From d7193dfa3469540330b6db0698e26db14bb7b781 Mon Sep 17 00:00:00 2001
From: Hongming Wang <hongmingwang.rabbit@users.noreply.github.com>
Date: Tue, 21 Apr 2026 04:34:11 -0700
Subject: [PATCH] feat(e2e): pivot to admin-bearer-only auth + add sanity
 self-check workflow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reduces required secret surface from 2 (session cookie + admin token)
to 1 (admin token). Pairs with molecule-controlplane#202 which adds:
  - POST /cp/admin/orgs    — server-to-server org creation
  - GET /cp/admin/orgs/:slug/admin-token — per-tenant bearer fetch

With those endpoints live, CI doesn't need to scrape a browser WorkOS
session cookie. CP admin bearer (Railway CP_ADMIN_API_TOKEN) drives
provision + tenant-token retrieval + teardown through a single
credential.

Changes
-------
  test_staging_full_saas.sh: admin bearer for provision/teardown,
    fetched per-tenant token drives all tenant API calls. Added
    E2E_INTENTIONAL_FAILURE=1 toggle that poisons the tenant token
    after provisioning so the teardown path gets exercised when the
    happy-path isn't.

  canvas/e2e/staging-setup.ts: same pivot; exports STAGING_TENANT_TOKEN
    instead of STAGING_SESSION_COOKIE.
  canvas/e2e/staging-tabs.spec.ts: context.setExtraHTTPHeaders with
    Authorization: Bearer on every page request, no cookie handling.

  All three workflows (e2e-staging-saas, canary-staging,
    e2e-staging-canvas): drop MOLECULE_STAGING_SESSION_COOKIE env +
    verification step. One secret to set.

  NEW e2e-staging-sanity.yml: weekly Mon 06:00 UTC. Runs the harness
    with E2E_INTENTIONAL_FAILURE=1 and inverts the pass condition —
    rc=1 is green, rc=0 (unexpected success) or rc=4 (leak) open a
    priority-high issue labelled e2e-safety-net. This is the
    answer to 'how do we know the teardown path still works when
    nothing else has failed recently.'

STAGING_SAAS_E2E.md refreshed: single-secret setup, sanity workflow
documented, canvas workflow added to the coverage matrix.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .github/workflows/canary-staging.yml     |   7 +-
 .github/workflows/e2e-staging-canvas.yml |   7 +-
 .github/workflows/e2e-staging-saas.yml   |  14 +-
 .github/workflows/e2e-staging-sanity.yml | 152 +++++++++++++++
 canvas/e2e/staging-setup.ts              | 131 ++++++-------
 canvas/e2e/staging-tabs.spec.ts          | 113 +++++------
 tests/e2e/STAGING_SAAS_E2E.md            | 120 +++++++-----
 tests/e2e/test_staging_full_saas.sh      | 235 ++++++++++-------------
 8 files changed, 450 insertions(+), 329 deletions(-)
 create mode 100644 .github/workflows/e2e-staging-sanity.yml

diff --git a/.github/workflows/canary-staging.yml b/.github/workflows/canary-staging.yml
index 8036b855..c5374df2 100644
--- a/.github/workflows/canary-staging.yml
+++ b/.github/workflows/canary-staging.yml
@@ -42,7 +42,6 @@ jobs:
 
     env:
       MOLECULE_CP_URL: https://staging-api.moleculesai.app
-      MOLECULE_SESSION_COOKIE: ${{ secrets.MOLECULE_STAGING_SESSION_COOKIE }}
       MOLECULE_ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }}
       E2E_MODE: canary
       E2E_RUNTIME: hermes
@@ -51,10 +50,10 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
-      - name: Verify required secrets
+      - name: Verify admin token present
         run: |
-          if [ -z "$MOLECULE_SESSION_COOKIE" ] || [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
-            echo "::error::Canary secrets missing — set MOLECULE_STAGING_SESSION_COOKIE and MOLECULE_STAGING_ADMIN_TOKEN"
+          if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
+            echo "::error::MOLECULE_STAGING_ADMIN_TOKEN not set"
             exit 2
           fi
 
diff --git a/.github/workflows/e2e-staging-canvas.yml b/.github/workflows/e2e-staging-canvas.yml
index e5347fec..e3c667cf 100644
--- a/.github/workflows/e2e-staging-canvas.yml
+++ b/.github/workflows/e2e-staging-canvas.yml
@@ -39,7 +39,6 @@ jobs:
     env:
       CANVAS_E2E_STAGING: '1'
       MOLECULE_CP_URL: https://staging-api.moleculesai.app
-      MOLECULE_SESSION_COOKIE: ${{ secrets.MOLECULE_STAGING_SESSION_COOKIE }}
       MOLECULE_ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }}
 
     defaults:
@@ -49,10 +48,10 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
-      - name: Verify required secrets
+      - name: Verify admin token present
         run: |
-          if [ -z "$MOLECULE_SESSION_COOKIE" ] || [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
-            echo "::error::Missing MOLECULE_STAGING_SESSION_COOKIE or MOLECULE_STAGING_ADMIN_TOKEN"
+          if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
+            echo "::error::Missing MOLECULE_STAGING_ADMIN_TOKEN"
             exit 2
           fi
 
diff --git a/.github/workflows/e2e-staging-saas.yml b/.github/workflows/e2e-staging-saas.yml
index 5f776664..1780b72d 100644
--- a/.github/workflows/e2e-staging-saas.yml
+++ b/.github/workflows/e2e-staging-saas.yml
@@ -74,9 +74,9 @@ jobs:
 
     env:
       MOLECULE_CP_URL: https://staging-api.moleculesai.app
-      # Secrets referenced here must be configured in
+      # Single admin-bearer secret drives provision + tenant-token
+      # retrieval + teardown. Configure in
       # Settings → Secrets and variables → Actions → Repository secrets.
-      MOLECULE_SESSION_COOKIE: ${{ secrets.MOLECULE_STAGING_SESSION_COOKIE }}
       MOLECULE_ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }}
       E2E_RUNTIME: ${{ github.event.inputs.runtime || 'hermes' }}
       E2E_RUN_ID: "${{ github.run_id }}-${{ github.run_attempt }}"
@@ -85,17 +85,13 @@ jobs:
     steps:
       - uses: actions/checkout@v4
 
-      - name: Verify required secrets
+      - name: Verify admin token present
         run: |
-          if [ -z "$MOLECULE_SESSION_COOKIE" ]; then
-            echo "::error::MOLECULE_STAGING_SESSION_COOKIE secret not set"
-            exit 2
-          fi
           if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
-            echo "::error::MOLECULE_STAGING_ADMIN_TOKEN secret not set"
+            echo "::error::MOLECULE_STAGING_ADMIN_TOKEN secret not set (Railway staging CP_ADMIN_API_TOKEN)"
             exit 2
           fi
-          echo "Secrets present ✓"
+          echo "Admin token present ✓"
 
       - name: CP staging health preflight
         run: |
diff --git a/.github/workflows/e2e-staging-sanity.yml b/.github/workflows/e2e-staging-sanity.yml
new file mode 100644
index 00000000..f20f628b
--- /dev/null
+++ b/.github/workflows/e2e-staging-sanity.yml
@@ -0,0 +1,152 @@
+name: E2E Staging Sanity (leak-detection self-check)
+
+# Periodic assertion that the teardown safety nets in e2e-staging-saas
+# and canary-staging actually work. Runs the E2E harness with
+# E2E_INTENTIONAL_FAILURE=1, which poisons the tenant admin token after
+# the org is provisioned. The workspace-provision step then fails, the
+# script exits non-zero, and the EXIT trap + workflow always()-step
+# must still tear down cleanly.
+#
+# A green run means:
+#   - The script exited non-zero (intentional failure caught)
+#   - The trap fired teardown
+#   - The leak-detection poll found zero orphan orgs
+#
+# A red run means the teardown path itself is broken — act on this the
+# same way you'd act on a canary failure (the whole E2E safety net is
+# compromised until it's fixed).
+#
+# Cadence: once a week, Monday 06:00 UTC. Drift-slow, not per-PR — the
+# teardown path rarely changes, and a weekly heartbeat is enough to
+# catch silent regressions in cleanup code paths.
+
+on:
+  schedule:
+    - cron: '0 6 * * 1'
+  workflow_dispatch:
+
+concurrency:
+  # Shares the group with canary + full so they don't collide on
+  # staging org-create quota.
+  group: e2e-staging-sanity
+  cancel-in-progress: false
+
+permissions:
+  issues: write
+  contents: read
+
+jobs:
+  sanity:
+    name: Intentional-failure teardown sanity
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+
+    env:
+      MOLECULE_CP_URL: https://staging-api.moleculesai.app
+      MOLECULE_ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }}
+      E2E_MODE: canary            # lean lifecycle; we only need the org to exist
+      E2E_RUNTIME: hermes
+      E2E_RUN_ID: "sanity-${{ github.run_id }}"
+      E2E_INTENTIONAL_FAILURE: "1"
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Verify admin token present
+        run: |
+          if [ -z "$MOLECULE_ADMIN_TOKEN" ]; then
+            echo "::error::MOLECULE_STAGING_ADMIN_TOKEN not set"
+            exit 2
+          fi
+
+      # Inverted assertion: the run MUST fail. If it passes, the
+      # E2E_INTENTIONAL_FAILURE path is broken (token not being
+      # poisoned correctly, or the harness silently recovered).
+      - name: Run harness — expecting exit !=0
+        id: harness
+        run: |
+          set +e
+          bash tests/e2e/test_staging_full_saas.sh
+          rc=$?
+          echo "harness_rc=$rc" >> "$GITHUB_OUTPUT"
+          # The only acceptable outcomes:
+          #   1 — harness failed mid-run, teardown ran, leak-check passed
+          #   (exit 4 means teardown left a leak — that's the real bug
+          #    this sanity check exists to catch)
+          if [ "$rc" = "1" ]; then
+            echo "✓ Harness failed as expected (rc=1); teardown trap ran, leak-check passed"
+            exit 0
+          elif [ "$rc" = "0" ]; then
+            echo "::error::Harness succeeded under E2E_INTENTIONAL_FAILURE=1 — the poisoning path is broken"
+            exit 1
+          elif [ "$rc" = "4" ]; then
+            echo "::error::LEAK DETECTED (rc=4) — teardown failed to clean up the org. Safety net broken."
+            exit 4
+          else
+            echo "::error::Unexpected rc=$rc — neither clean-failure nor leak. Investigate harness."
+            exit 1
+          fi
+
+      - name: Open issue if safety net is broken
+        if: failure()
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const title = "🚨 E2E teardown safety net broken";
+            const runURL = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`;
+            const body =
+              `The weekly sanity run (E2E_INTENTIONAL_FAILURE=1) did not exit ` +
+              `as expected. This means one of:\n` +
+              `  - poisoning didn't actually cause failure (test harness regression), OR\n` +
+              `  - teardown left an orphan org (leak detection caught a real bug)\n\n` +
+              `Run: ${runURL}\n\n` +
+              `This is higher priority than a canary failure — the whole ` +
+              `E2E safety net can't be trusted until this is resolved.`;
+
+            const { data: existing } = await github.rest.issues.listForRepo({
+              owner: context.repo.owner, repo: context.repo.repo,
+              state: 'open', labels: 'e2e-safety-net',
+            });
+            const match = existing.find(i => i.title === title);
+            if (match) {
+              await github.rest.issues.createComment({
+                owner: context.repo.owner, repo: context.repo.repo,
+                issue_number: match.number,
+                body: `Still broken. ${runURL}`,
+              });
+            } else {
+              await github.rest.issues.create({
+                owner: context.repo.owner, repo: context.repo.repo,
+                title, body,
+                labels: ['e2e-safety-net', 'bug', 'priority-high'],
+              });
+            }
+
+      # Belt-and-braces: if teardown left anything behind, nuke it here
+      # so we don't bleed staging quota. Different label from the
+      # always()-steps in the other workflows so sanity-only orgs get
+      # cleaned up by sanity runs.
+      - name: Teardown safety net
+        if: always()
+        env:
+          ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }}
+        run: |
+          set +e
+          orgs=$(curl -sS "$MOLECULE_CP_URL/cp/admin/orgs" \
+            -H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \
+            | python3 -c "
+          import json, sys
+          d = json.load(sys.stdin)
+          today = __import__('datetime').date.today().strftime('%Y%m%d')
+          candidates = [o['slug'] for o in d.get('orgs', [])
+                        if o.get('slug','').startswith(f'e2e-canary-{today}-sanity-')
+                        and o.get('status') not in ('purged',)]
+          print('\n'.join(candidates))
+          " 2>/dev/null)
+          for slug in $orgs; do
+            curl -sS -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \
+              -H "Authorization: Bearer $ADMIN_TOKEN" \
+              -H "Content-Type: application/json" \
+              -d "{\"confirm_token\":\"$slug\"}" >/dev/null || true
+          done
+          exit 0
diff --git a/canvas/e2e/staging-setup.ts b/canvas/e2e/staging-setup.ts
index 1850a426..598fb877 100644
--- a/canvas/e2e/staging-setup.ts
+++ b/canvas/e2e/staging-setup.ts
@@ -1,25 +1,21 @@
 /**
  * Playwright global setup for the staging canvas E2E.
  *
- * Provisions a fresh staging org per test run (via POST /cp/orgs against
- * staging CP), waits for the tenant EC2 + cloudflared tunnel + TLS
- * propagation, provisions one hermes workspace on the new tenant, waits
- * for it to reach status=online, then exports:
+ * Provisions a fresh staging org per run (POST /cp/admin/orgs), fetches
+ * the per-tenant admin token, provisions one hermes workspace, waits
+ * for online, then exports:
  *
- *   STAGING_TENANT_URL    — https://<slug>.moleculesai.app
- *   STAGING_WORKSPACE_ID  — UUID of the provisioned hermes workspace
- *   STAGING_SLUG          — org slug (for teardown)
+ *   STAGING_TENANT_URL     https://<slug>.moleculesai.app
+ *   STAGING_WORKSPACE_ID   UUID of the hermes workspace
+ *   STAGING_TENANT_TOKEN   per-tenant admin bearer (for spec requests)
+ *   STAGING_SLUG           org slug (used by teardown)
  *
- * staging-teardown.ts consumes STAGING_SLUG to DELETE the org.
- *
- * Required env (set via GH Actions secrets in the workflow):
- *   MOLECULE_CP_URL           default: https://staging-api.moleculesai.app
- *   MOLECULE_SESSION_COOKIE   WorkOS session for the staging test user
- *   MOLECULE_ADMIN_TOKEN      CP admin bearer for teardown (unused in setup
- *                             but checked here so both halves fail fast)
- *
- * Runs only when CANVAS_E2E_STAGING=1 so local `pnpm playwright test` in
- * dev doesn't try to provision against staging by accident.
+ * Required env:
+ *   MOLECULE_CP_URL        default: https://staging-api.moleculesai.app
+ *   MOLECULE_ADMIN_TOKEN   CP admin bearer (Railway staging
+ *                          CP_ADMIN_API_TOKEN). Drives provision +
+ *                          tenant-token retrieval + teardown via a
+ *                          single credential.
  */
 
 import type { FullConfig } from "@playwright/test";
@@ -27,11 +23,10 @@ import { writeFileSync } from "fs";
 import { join } from "path";
 
 const CP_URL = process.env.MOLECULE_CP_URL || "https://staging-api.moleculesai.app";
-const SESSION = process.env.MOLECULE_SESSION_COOKIE;
 const ADMIN_TOKEN = process.env.MOLECULE_ADMIN_TOKEN;
 const STAGING = process.env.CANVAS_E2E_STAGING === "1";
 
-const PROVISION_TIMEOUT_MS = 15 * 60 * 1000; // 15 min cold-boot budget
+const PROVISION_TIMEOUT_MS = 15 * 60 * 1000;
 const WORKSPACE_ONLINE_TIMEOUT_MS = 10 * 60 * 1000;
 const TLS_TIMEOUT_MS = 3 * 60 * 1000;
 
@@ -41,10 +36,7 @@ async function jsonFetch(
 ): Promise<{ status: number; body: any }> {
   const res = await fetch(url, {
     ...init,
-    headers: {
-      "Content-Type": "application/json",
-      ...(init.headers || {}),
-    },
+    headers: { "Content-Type": "application/json", ...(init.headers || {}) },
   });
   let body: any = null;
   try {
@@ -71,8 +63,6 @@ async function waitFor<T>(
 }
 
 function makeSlug(): string {
-  // Matches CP's ^[a-z][a-z0-9-]{2,31}$. The "e2e-" prefix lets auto-cleanup
-  // crons grep-find leftovers from crashed runs.
   const y = new Date().toISOString().slice(0, 10).replace(/-/g, "");
   const rand = Math.random().toString(36).slice(2, 8);
   return `e2e-canvas-${y}-${rand}`.slice(0, 32);
@@ -83,67 +73,65 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
     console.log("[staging-setup] CANVAS_E2E_STAGING not set, skipping");
     return;
   }
-
-  if (!SESSION) {
-    throw new Error("MOLECULE_SESSION_COOKIE required for staging E2E");
-  }
   if (!ADMIN_TOKEN) {
     throw new Error(
-      "MOLECULE_ADMIN_TOKEN required for staging E2E (teardown needs it)",
+      "MOLECULE_ADMIN_TOKEN required (Railway staging CP_ADMIN_API_TOKEN)",
     );
   }
 
   const slug = makeSlug();
-  const cookieHeader = `molecule_cp_session=${SESSION}`;
+  const adminAuth = { Authorization: `Bearer ${ADMIN_TOKEN}` };
   console.log(`[staging-setup] Using slug=${slug}`);
 
-  // 1. Accept terms (idempotent — already-accepted returns 2xx or 400)
-  await jsonFetch(`${CP_URL}/cp/auth/accept-terms`, {
+  // 1. Create org via admin endpoint — no WorkOS session needed
+  const create = await jsonFetch(`${CP_URL}/cp/admin/orgs`, {
     method: "POST",
-    headers: { Cookie: cookieHeader },
-    body: JSON.stringify({}),
-  }).catch(() => {
-    /* best-effort */
-  });
-
-  // 2. Create org
-  const create = await jsonFetch(`${CP_URL}/cp/orgs`, {
-    method: "POST",
-    headers: { Cookie: cookieHeader },
-    body: JSON.stringify({ slug, name: `E2E Canvas ${slug}` }),
+    headers: adminAuth,
+    body: JSON.stringify({
+      slug,
+      name: `E2E Canvas ${slug}`,
+      owner_user_id: `e2e-runner:${slug}`,
+    }),
   });
   if (create.status >= 400) {
     throw new Error(
-      `POST /cp/orgs returned ${create.status}: ${JSON.stringify(create.body)}`,
+      `POST /cp/admin/orgs ${create.status}: ${JSON.stringify(create.body)}`,
     );
   }
   console.log(`[staging-setup] Org created: ${slug}`);
 
-  // 3. Wait for tenant provision (status=running)
-  const finalStatus = await waitFor<{ url?: string; status: string }>(
+  // 2. Wait for tenant running (admin-orgs list is the status source)
+  await waitFor<boolean>(
     async () => {
-      const r = await jsonFetch(
-        `${CP_URL}/cp/orgs/${slug}/provision-status`,
-        { headers: { Cookie: cookieHeader } },
-      );
+      const r = await jsonFetch(`${CP_URL}/cp/admin/orgs`, { headers: adminAuth });
       if (r.status !== 200) return null;
-      if (r.body?.status === "running") return r.body;
-      if (r.body?.status === "failed") {
-        throw new Error(`Provisioning failed: ${JSON.stringify(r.body)}`);
-      }
+      const row = (r.body?.orgs || []).find((o: any) => o.slug === slug);
+      if (!row) return null;
+      if (row.status === "running") return true;
+      if (row.status === "failed") throw new Error(`provision failed: ${slug}`);
       return null;
     },
     PROVISION_TIMEOUT_MS,
     15_000,
     "tenant provision",
   );
+  console.log(`[staging-setup] Tenant running`);
 
-  const tenantURL =
-    finalStatus.url ||
-    `https://${slug}.${CP_URL.includes("staging") ? "moleculesai.app" : "moleculesai.app"}`;
+  // 3. Fetch per-tenant admin token
+  const tokRes = await jsonFetch(
+    `${CP_URL}/cp/admin/orgs/${slug}/admin-token`,
+    { headers: adminAuth },
+  );
+  if (tokRes.status !== 200 || !tokRes.body?.admin_token) {
+    throw new Error(
+      `tenant-token fetch ${tokRes.status}: ${JSON.stringify(tokRes.body)}`,
+    );
+  }
+  const tenantToken: string = tokRes.body.admin_token;
+  const tenantURL = `https://${slug}.moleculesai.app`;
   console.log(`[staging-setup] Tenant URL: ${tenantURL}`);
 
-  // 4. Wait for tenant TLS readiness
+  // 4. TLS readiness
   await waitFor<boolean>(
     async () => {
       try {
@@ -160,10 +148,11 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
     "tenant TLS",
   );
 
-  // 5. Provision one hermes workspace (cheapest, fastest-booting)
+  // 5. Provision workspace
+  const tenantAuth = { Authorization: `Bearer ${tenantToken}` };
   const ws = await jsonFetch(`${tenantURL}/workspaces`, {
     method: "POST",
-    headers: { Cookie: cookieHeader },
+    headers: tenantAuth,
     body: JSON.stringify({
       name: "E2E Canvas Test",
       runtime: "hermes",
@@ -172,9 +161,7 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
     }),
   });
   if (ws.status >= 400 || !ws.body?.id) {
-    throw new Error(
-      `Workspace create failed (${ws.status}): ${JSON.stringify(ws.body)}`,
-    );
+    throw new Error(`Workspace create ${ws.status}: ${JSON.stringify(ws.body)}`);
   }
   const workspaceId = ws.body.id as string;
   console.log(`[staging-setup] Workspace created: ${workspaceId}`);
@@ -183,14 +170,12 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
   await waitFor<boolean>(
     async () => {
       const r = await jsonFetch(`${tenantURL}/workspaces/${workspaceId}`, {
-        headers: { Cookie: cookieHeader },
+        headers: tenantAuth,
       });
       if (r.status !== 200) return null;
       if (r.body?.status === "online") return true;
       if (r.body?.status === "failed") {
-        throw new Error(
-          `Workspace ${workspaceId} failed: ${r.body.last_sample_error || ""}`,
-        );
+        throw new Error(`Workspace failed: ${r.body.last_sample_error || ""}`);
       }
       return null;
     },
@@ -200,19 +185,15 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
   );
   console.log(`[staging-setup] Workspace online`);
 
-  // 7. Export via a state file so staging-teardown and the test spec can
-  //    pick up the same slug / urls. Playwright's global setup can't
-  //    export env to the test subprocess directly in all configurations.
+  // 7. Hand state off to tests + teardown
   const stateFile = join(process.cwd(), ".playwright-staging-state.json");
   writeFileSync(
     stateFile,
-    JSON.stringify({ slug, tenantURL, workspaceId }, null, 2),
+    JSON.stringify({ slug, tenantURL, workspaceId, tenantToken }, null, 2),
   );
-  // Also set env for in-process test reads.
   process.env.STAGING_SLUG = slug;
   process.env.STAGING_TENANT_URL = tenantURL;
   process.env.STAGING_WORKSPACE_ID = workspaceId;
-  process.env.STAGING_SESSION_COOKIE = SESSION;
-
+  process.env.STAGING_TENANT_TOKEN = tenantToken;
   console.log(`[staging-setup] Ready — ${stateFile}`);
 }
diff --git a/canvas/e2e/staging-tabs.spec.ts b/canvas/e2e/staging-tabs.spec.ts
index 6e8b5d9c..412953a5 100644
--- a/canvas/e2e/staging-tabs.spec.ts
+++ b/canvas/e2e/staging-tabs.spec.ts
@@ -3,20 +3,24 @@
  * fresh staging org provisioned in the global setup. Asserts each tab
  * renders without throwing and captures a screenshot for visual review.
  *
- * Relies on `staging-setup.ts` to provision a tenant org, provision one
- * hermes workspace on it, and hand us a tenant URL + workspace id via
- * env (set by the setup file before tests run). Global teardown tears
- * down the org.
+ * Auth model: the tenant platform's AdminAuth middleware accepts a bearer
+ * token OR a WorkOS session cookie. Playwright can't mint a WorkOS
+ * session, so we feed the per-tenant admin token (fetched in global
+ * setup via GET /cp/admin/orgs/:slug/admin-token) as an Authorization:
+ * Bearer header via context.setExtraHTTPHeaders(). Every browser
+ * request inherits the header.
  *
- * Runs only when CANVAS_E2E_STAGING=1 — tests are skipped in local dev
- * where the prerequisite env isn't set.
+ * Known SaaS gaps — documented in #1369 and allowed to render errored
+ * content without failing the test (the gate is "no hard crash, no
+ * 'Failed to load' toast"):
+ *   - Files tab: empty (platform can't docker exec into a remote EC2)
+ *   - Terminal tab: WS connect fails
+ *   - Peers tab: 401 without workspace-scoped token
  */
 
 import { test, expect } from "@playwright/test";
 
 // Tab ids as declared in canvas/src/components/SidePanel.tsx TABS.
-// Kept duplicated here (not imported) because Playwright tests run outside
-// the Next.js bundler and can't import from @/components paths.
 const TAB_IDS = [
   "chat",
   "activity",
@@ -43,32 +47,21 @@ test.describe("staging canvas tabs", () => {
     context,
   }) => {
     const tenantURL = process.env.STAGING_TENANT_URL;
-    const sessionCookie = process.env.STAGING_SESSION_COOKIE;
+    const tenantToken = process.env.STAGING_TENANT_TOKEN;
     const workspaceId = process.env.STAGING_WORKSPACE_ID;
 
-    if (!tenantURL || !sessionCookie || !workspaceId) {
+    if (!tenantURL || !tenantToken || !workspaceId) {
       throw new Error(
-        "staging-setup.ts did not export STAGING_TENANT_URL / STAGING_SESSION_COOKIE / STAGING_WORKSPACE_ID — did global setup run?",
+        "staging-setup.ts did not export STAGING_TENANT_URL / STAGING_TENANT_TOKEN / STAGING_WORKSPACE_ID — did global setup run?",
       );
     }
 
-    // The session cookie was minted by CP at sign-in; canvas on the tenant
-    // subdomain shares it via the parent-domain scope (.moleculesai.app).
-    // Playwright needs both the cookie and the cross-domain visibility.
-    const url = new URL(tenantURL);
-    await context.addCookies([
-      {
-        name: "molecule_cp_session",
-        value: sessionCookie,
-        // Leading dot → valid on all subdomains. The staging WorkOS auth
-        // flow sets it this way, so we mirror.
-        domain: "." + url.hostname.replace(/^[^.]+\./, ""),
-        path: "/",
-        httpOnly: true,
-        secure: true,
-        sameSite: "Lax",
-      },
-    ]);
+    // Attach the per-tenant admin bearer to every outbound request.
+    // The tenant platform's AdminAuth middleware accepts this; no
+    // WorkOS session needed.
+    await context.setExtraHTTPHeaders({
+      Authorization: `Bearer ${tenantToken}`,
+    });
 
     const consoleErrors: string[] = [];
     page.on("console", (msg) => {
@@ -79,12 +72,13 @@ test.describe("staging canvas tabs", () => {
 
     await page.goto(tenantURL, { waitUntil: "networkidle" });
 
-    // Canvas hydration races WebSocket connect + /workspaces fetch. Wait
-    // for the workspace node selector or the hydration-error banner —
-    // whichever wins first.
-    await page.waitForSelector('[role="tablist"], [data-testid="hydration-error"]', {
-      timeout: 45_000,
-    });
+    // Canvas hydration races WebSocket connect + /workspaces fetch.
+    // Wait for the tablist element (appears after a workspace is
+    // selected) or the hydration-error banner — whichever wins first.
+    await page.waitForSelector(
+      '[role="tablist"], [data-testid="hydration-error"]',
+      { timeout: 45_000 },
+    );
 
     const hydrationErr = await page
       .locator('[data-testid="hydration-error"]')
@@ -94,20 +88,19 @@ test.describe("staging canvas tabs", () => {
       "canvas hydration failed — check staging CP + tenant reachability",
     ).toBe(0);
 
-    // Click the workspace node to open the side panel. The node's
-    // accessible name is the workspace display name; we match by id attr
-    // to avoid coupling to the display name which tests can't know.
-    const node = page.locator(`[data-workspace-id="${workspaceId}"]`).first();
-    // Fallback: click by role if the data attribute isn't wired
-    if ((await node.count()) === 0) {
-      // Try clicking the first workspace card visible
-      const firstNode = page.locator('[role="button"][aria-label*="Workspace"]').first();
-      await firstNode.click({ timeout: 10_000 });
+    // Click the workspace node to open the side panel. Try a data
+    // attribute first, fall back to a generic role-based selector so
+    // the test doesn't break when the node-card markup changes.
+    const byDataAttr = page.locator(`[data-workspace-id="${workspaceId}"]`).first();
+    if ((await byDataAttr.count()) > 0) {
+      await byDataAttr.click({ timeout: 10_000 });
     } else {
-      await node.click({ timeout: 10_000 });
+      const firstNode = page
+        .locator('[role="button"][aria-label*="Workspace" i]')
+        .first();
+      await firstNode.click({ timeout: 10_000 });
     }
 
-    // Wait for the side panel tablist to mount
     await page.waitForSelector('[role="tablist"]', { timeout: 15_000 });
 
     for (const tabId of TAB_IDS) {
@@ -120,23 +113,17 @@ test.describe("staging canvas tabs", () => {
         await tabButton.click();
 
         const panel = page.locator(`#panel-${tabId}`);
-        await expect(
-          panel,
-          `panel for ${tabId} never rendered`,
-        ).toBeVisible({ timeout: 10_000 });
+        await expect(panel, `panel for ${tabId} never rendered`).toBeVisible({
+          timeout: 10_000,
+        });
 
-        // No toast-style error banner should appear for a healthy workspace.
-        // Known exceptions: terminal may 4xx on SaaS cross-EC2 (WS target
-        // unreachable), peers may 401 without workspace token. Those are
-        // reported separately in issue #1369; here we just guard against
-        // hard crashes (toast with "Error" keyword).
+        // "Failed to load" toast = hard crash. Known SaaS-mode gaps
+        // (Files empty, Terminal disconnected, Peers 401) surface as
+        // in-panel content, not toasts.
         const errorToasts = await page
           .locator('[role="alert"]:has-text("Failed to load")')
           .count();
-        expect(
-          errorToasts,
-          `tab ${tabId}: saw "Failed to load" toast`,
-        ).toBe(0);
+        expect(errorToasts, `tab ${tabId}: "Failed to load" toast`).toBe(0);
 
         await page.screenshot({
           path: `test-results/staging-tab-${tabId}.png`,
@@ -145,14 +132,16 @@ test.describe("staging canvas tabs", () => {
       });
     }
 
-    // Aggregate console-error check. Allow a small budget for known-noisy
-    // Sentry/Vercel analytics errors that don't reflect app health.
+    // Aggregate console-error budget. Known-noisy sources whitelisted:
+    // Sentry, Vercel analytics, WS reconnects (expected on SaaS
+    // terminal), favicon 404 (cosmetic).
     const appErrors = consoleErrors.filter(
       (msg) =>
         !msg.includes("sentry") &&
         !msg.includes("vercel") &&
-        !msg.includes("WebSocket") && // WS failures ≠ app failures
-        !msg.includes("favicon"),
+        !msg.includes("WebSocket") &&
+        !msg.includes("favicon") &&
+        !msg.includes("molecule-icon.png"), // another cosmetic 404
     );
     expect(
       appErrors,
diff --git a/tests/e2e/STAGING_SAAS_E2E.md b/tests/e2e/STAGING_SAAS_E2E.md
index 11d1c973..dd4e3095 100644
--- a/tests/e2e/STAGING_SAAS_E2E.md
+++ b/tests/e2e/STAGING_SAAS_E2E.md
@@ -1,63 +1,76 @@
-# Staging full-SaaS E2E
+# Staging SaaS E2E — runbook
 
-`tests/e2e/test_staging_full_saas.sh` provisions a fresh org per run, exercises the workspace lifecycle end-to-end, then tears the org down and asserts leak-free. Runs in CI via `.github/workflows/e2e-staging-saas.yml`.
+Four workflows + a shared bash harness that together cover the SaaS stack end to end against live staging. Every workflow provisions a fresh org per run and tears it down; leaks are CI failures.
 
-## What it covers
+## Coverage
 
-| Step | What it verifies |
+| Workflow | Cadence | Wall time | Scope |
+|---|---|---|---|
+| `e2e-staging-saas.yml` | push + nightly 07:00 UTC | ~20 min | Full API: org → tenant → 2 workspaces → A2A → HMA → delegation → leak check |
+| `canary-staging.yml` | every 30 min | ~8 min | Minimum smoke + self-managed alert issue |
+| `e2e-staging-canvas.yml` | push + weekly Sunday 08:00 | ~25 min | All 13 canvas workspace-panel tabs via Playwright |
+| `e2e-staging-sanity.yml` | weekly Monday 06:00 | ~10 min | Intentional-failure: teardown safety-net self-check |
+
+`tests/e2e/test_staging_full_saas.sh` is the shared harness all workflows invoke (with `E2E_MODE={full|canary}` and `E2E_INTENTIONAL_FAILURE={0|1}` toggles).
+
+### Full-SaaS checklist (sections)
+
+| # | What |
 |---|---|
-| 1. Accept terms (POST `/cp/auth/accept-terms`) | Session cookie valid, ToS gate honours idempotent replay |
-| 2. Create org (POST `/cp/orgs`) | Slug validation, member insert, billing gate, quota |
-| 3. Wait for provisioning | CP tenant EC2 boot + cloudflared tunnel + DNS + TLS (~5–10 min cold) |
-| 4. Tenant health (GET `/health` on new tenant URL) | Cert chain OK, TenantGuard + session-auth wired |
-| 5. Provision parent workspace | SaaS provision path (CP RunInstances, EC2 bootstrap, runtime register) |
-| 6. Provision child workspace under parent | `parent_id` relationship, team-hierarchy |
-| 7. Wait both online | Workspace sweeper + register handler + token bootstrap |
-| 8. A2A round-trip (POST `/workspaces/:id/a2a`) | Full LLM loop — registration, MCP tools, provider auth, response shape |
-| 9. HMA memory write+read | `/memories` scope routing, awareness namespace, persistence |
-| 9b. Peers + activity smoke | Route registration + activity-log write path |
-| 10. Teardown | `DELETE /cp/admin/tenants/:slug` + leak assertion |
+| 0 | CP preflight |
+| 1 | `POST /cp/admin/orgs` — org create without WorkOS session |
+| 2 | Wait for tenant status = running |
+| 3 | `GET /cp/admin/orgs/:slug/admin-token` — fetch per-tenant bearer |
+| 4 | Tenant TLS readiness on `/health` |
+| 5 | Provision parent workspace |
+| 6 | Provision child workspace (full mode) |
+| 7 | Wait both online |
+| 8 | A2A round-trip on parent — expect agent response |
+| 9 | HMA memory write + read, peers smoke, activity log (full mode) |
+| 10 | Delegation mechanics: parent → child via proxy + activity assertion (full mode) |
+| 11 | EXIT trap — teardown + leak detection |
 
-If any step fails, the EXIT trap tears down the org anyway.
+### Canvas tabs
 
-## Required GitHub Actions secrets
+Opens all 13 workspace-panel tabs against the freshly-provisioned org:
 
-Both are at **Settings → Secrets and variables → Actions → Repository secrets**:
+```
+chat, activity, details, skills, terminal, config, schedule,
+channels, files, memory, traces, events, audit
+```
 
-### `MOLECULE_STAGING_SESSION_COOKIE`
+Per tab: visible, panel renders, no "Failed to load" toast, screenshot captured. Known SaaS-mode gaps (Files empty, Terminal disconnect, Peers 401) are whitelisted — see issue #1369.
 
-A valid `molecule_cp_session` cookie for a **test user** that:
+### Sanity self-check
 
-- is on the staging beta allowlist (or `BETA_GATE_ENABLED=false` on staging)
-- has already accepted the current terms version (the script re-accepts idempotently but can't bootstrap from unaccepted)
-- has under-quota owned orgs
+Runs the harness with `E2E_INTENTIONAL_FAILURE=1`, which poisons the tenant admin token after the org is provisioned. The workspace-provision step then fails and the script exits non-zero; the EXIT trap + teardown + leak assertion must still run clean. If they don't, the sanity workflow files a `priority-high` issue with label `e2e-safety-net`.
 
-**How to extract:**
+## Required secret (exactly one)
 
-1. In an incognito window, sign in at `https://staging-api.moleculesai.app/cp/auth/login` with the test user.
-2. DevTools → Application → Cookies → `https://staging-api.moleculesai.app`
-3. Copy the `molecule_cp_session` value (base64-looking blob).
-4. Paste as the secret value. Do not include the `molecule_cp_session=` prefix.
-
-**Rotation:** WorkOS sessions don't expire until the user signs out or the refresh token revokes. A 90-day rotation schedule is safe.
+Set in **Settings → Secrets and variables → Actions → Repository secrets**:
 
 ### `MOLECULE_STAGING_ADMIN_TOKEN`
 
-The `CP_ADMIN_API_TOKEN` env var currently set on the Railway **staging** molecule-platform → controlplane service.
-
-**How to extract:**
+The `CP_ADMIN_API_TOKEN` env currently set on the Railway staging molecule-platform → controlplane service.
 
 ```
-railway variables --service controlplane --environment staging --kv | grep CP_ADMIN_API_TOKEN
+railway variables --environment staging --service controlplane --kv | grep CP_ADMIN_API_TOKEN
 ```
 
-Used exclusively for teardown (`DELETE /cp/admin/tenants/:slug`) and leak detection (`GET /cp/admin/orgs`). Write access, treat like prod admin.
+This **one** secret drives everything:
+
+- `POST /cp/admin/orgs` — provision org (no WorkOS session needed)
+- `GET /cp/admin/orgs/:slug/admin-token` — fetch per-tenant bearer
+- `DELETE /cp/admin/tenants/:slug` — teardown
+- `GET /cp/admin/orgs` — leak detection post-teardown
+
+The per-tenant admin token (short-lived, per-org) drives every tenant-side call (`POST /workspaces`, `/memories`, `/a2a`, etc.).
+
+**No WorkOS session cookie needed** — admin endpoints bypass session auth via `AdminGate` (bearer + rate-limit only). CI provision + teardown collapse to one credential.
 
 ## Running locally
 
 ```
-export MOLECULE_CP_URL=https://staging-api.moleculesai.app
-export MOLECULE_SESSION_COOKIE="…"
 export MOLECULE_ADMIN_TOKEN="…"
 # Optional: keep the org for post-mortem inspection
 export E2E_KEEP_ORG=1
@@ -68,14 +81,29 @@ bash tests/e2e/test_staging_full_saas.sh
 
 ## Cost
 
-- Full run: ~20 min wall clock
-- Compute: ~12 min of t3.small tenant EC2 + ~4 min of per-workspace EC2 × 2 = ~20 t3.small-minutes ≈ **$0.007/run**
-- Daily (nightly cron + PR runs ≈ 5/day): **~$0.04/day**
-- Hard timeout (30 min workflow timeout + per-request curl timeouts) caps runaway cost
+- Full run: ~20 min, ~$0.007
+- Canary (48/day): ~$0.06/day
+- Canvas (few/week): ~$0.01/day
+- Sanity (weekly): ~$0.002/week
+- **Total staging burn: < $0.15/day** at expected CI load
 
-## Known gaps (follow-ups)
+Hard per-workflow timeouts (15–40 min) cap runaway cost. Three teardown layers:
 
-- Canvas UI tabs not covered — separate Playwright workflow in `e2e-staging-canvas.yml` (todo)
-- Delegation end-to-end (parent calls `delegate_task` MCP tool against child) — not in this run because it needs a real LLM loop and doubles runtime cost
-- Claude Code runtime test — currently only Hermes is exercised to keep wall time down; pass `runtime: claude-code` via workflow_dispatch to test it
-- No screenshot/trace capture on failure — add if CI signal is noisy
+1. Bash `trap cleanup_org EXIT INT TERM` in the harness
+2. Playwright `globalTeardown` for the canvas workflow
+3. `if: always()` step in every workflow that greps today's `e2e-*` orgs and force-deletes them
+
+## Exit codes
+
+| Code | Meaning |
+|---|---|
+| 0 | Happy path |
+| 1 | Generic failure (agent didn't respond, provisioning hung, etc.) |
+| 2 | Missing required env |
+| 3 | Provisioning timed out |
+| 4 | Teardown left orphan resources (**leak detected — sanity workflow catches this**) |
+
+## Known gaps (tracked elsewhere)
+
+- [#1369](https://github.com/Molecule-AI/molecule-core/issues/1369): SaaS canvas Files / Terminal / Peers tabs — architecturally broken; whitelisted in the spec
+- LLM-driven delegation (autonomous `delegate_task` tool use) — probabilistic, not in v1; proxy mechanics covered
diff --git a/tests/e2e/test_staging_full_saas.sh b/tests/e2e/test_staging_full_saas.sh
index 4e0ace5c..85e5bc8a 100755
--- a/tests/e2e/test_staging_full_saas.sh
+++ b/tests/e2e/test_staging_full_saas.sh
@@ -1,61 +1,50 @@
 #!/usr/bin/env bash
 # Full-lifecycle SaaS E2E against staging.
 #
-# Creates a fresh org per run (unique slug), waits for tenant EC2 + cloudflared
-# provisioning, exercises every major workspace-level API (registration,
-# heartbeat, A2A, delegation, HMA memory, activity, peers, events), then
-# tears the whole org down and asserts that every cloud artefact (EC2, SG,
-# Cloudflare tunnel, DNS record, DB rows) has gone. A leaked resource at
-# teardown is a CI failure — that's the whole point of per-run org
-# provisioning.
+# Creates a fresh org per run (unique slug), waits for tenant EC2 +
+# cloudflared provisioning, exercises every major workspace-level API
+# (register, heartbeat, A2A, delegation, HMA memory, activity, peers),
+# then tears the whole org down and asserts that every cloud artefact
+# (EC2, SG, Cloudflare tunnel, DNS record, DB rows) is gone. A leaked
+# resource at teardown is a CI failure.
+#
+# Auth model:
+#   Single MOLECULE_ADMIN_TOKEN (= CP_ADMIN_API_TOKEN on Railway staging)
+#   drives everything:
+#     - POST /cp/admin/orgs to provision (no WorkOS session scraping)
+#     - GET  /cp/admin/orgs/:slug/admin-token to retrieve the per-tenant
+#       ADMIN_TOKEN once provisioning completes
+#     - DELETE /cp/admin/tenants/:slug for teardown
+#   The per-tenant admin token drives all tenant API calls (workspaces,
+#   memories, a2a).
 #
 # Required env:
-#   MOLECULE_CP_URL                Staging CP base URL (default:
-#                                  https://staging-api.moleculesai.app)
-#   MOLECULE_SESSION_COOKIE        Valid WorkOS session cookie for a test
-#                                  user that's already in the beta
-#                                  allowlist AND has accepted current terms.
-#                                  Extract from browser after signing in to
-#                                  staging. Name: molecule_cp_session.
-#   MOLECULE_ADMIN_TOKEN           CP admin bearer (CP_ADMIN_API_TOKEN on
-#                                  Railway). Used for teardown via
-#                                  DELETE /cp/admin/tenants/:slug and for
-#                                  leak-detection reads.
+#   MOLECULE_CP_URL        default: https://staging-api.moleculesai.app
+#   MOLECULE_ADMIN_TOKEN   CP admin bearer — Railway CP_ADMIN_API_TOKEN
 #
 # Optional env:
-#   E2E_RUNTIME                    Which runtime to test the agent round-trip
-#                                  with. Default: hermes (fastest boot, cheap).
-#                                  Use claude-code when you need to validate
-#                                  that fix.
-#   E2E_PROVISION_TIMEOUT_SECS     How long to wait for the tenant EC2 to
-#                                  come up. Default: 900 (15 min — cold
-#                                  EC2 + cloudflared tunnel + DNS propagation
-#                                  can touch that window).
-#   E2E_KEEP_ORG                   If set to 1, skip teardown. ONLY use
-#                                  locally for debugging — CI must never
-#                                  set this or staging fills with orphans.
-#   E2E_RUN_ID                     Override the auto-generated suffix. CI
-#                                  should pass ${GITHUB_RUN_ID} so the
-#                                  org slug is grep-able in AWS later.
-#   E2E_MODE                       "full" (default) runs every section.
-#                                  "canary" runs a lean variant: one
-#                                  parent workspace, one A2A PONG, then
-#                                  teardown. Used by the 30-min cron
-#                                  workflow so each canary finishes in
-#                                  ~8 min instead of the full ~20.
+#   E2E_RUNTIME                  hermes (default) | claude-code | langgraph
+#   E2E_PROVISION_TIMEOUT_SECS   default 900 (15 min cold EC2 budget)
+#   E2E_KEEP_ORG                 1 → skip teardown (debugging only)
+#   E2E_RUN_ID                   Slug suffix; CI: ${GITHUB_RUN_ID}
+#   E2E_MODE                     full (default) | canary
+#   E2E_INTENTIONAL_FAILURE      1 → poison tenant token mid-run so the
+#                                script fails; the EXIT trap MUST still
+#                                tear down cleanly (and exit 4 on leak).
+#                                Used by a dedicated sanity workflow
+#                                that verifies the safety net.
 #
 # Exit codes:
 #   0  happy path
-#   1  generic failure (see log)
+#   1  generic failure
 #   2  missing required env
 #   3  provisioning timed out
-#   4  cleanup left orphan resources (leak detected)
+#   4  teardown left orphan resources
 
 set -euo pipefail
 
 CP_URL="${MOLECULE_CP_URL:-https://staging-api.moleculesai.app}"
-SESSION_COOKIE="${MOLECULE_SESSION_COOKIE:?MOLECULE_SESSION_COOKIE required — see header for how to obtain}"
-ADMIN_TOKEN="${MOLECULE_ADMIN_TOKEN:?MOLECULE_ADMIN_TOKEN required — from Railway molecule-platform CP env}"
+ADMIN_TOKEN="${MOLECULE_ADMIN_TOKEN:?MOLECULE_ADMIN_TOKEN required — Railway staging CP_ADMIN_API_TOKEN}"
 RUNTIME="${E2E_RUNTIME:-hermes}"
 PROVISION_TIMEOUT_SECS="${E2E_PROVISION_TIMEOUT_SECS:-900}"
 RUN_ID_SUFFIX="${E2E_RUN_ID:-$(date +%H%M%S)-$$}"
@@ -65,13 +54,15 @@ case "$MODE" in
   *) echo "E2E_MODE must be 'full' or 'canary' (got: $MODE)" >&2; exit 2 ;;
 esac
 
-# Slug constraints from orgs.go: ^[a-z][a-z0-9-]{2,31}$.
-# Prefix with "e2e-" so test orgs are grep-able and auto-cleanup crons
-# can target them even when a script crashes before the EXIT trap fires.
-SLUG="e2e-$(date +%Y%m%d)-${RUN_ID_SUFFIX}"
+# Canary runs get a distinct prefix so their safety-net sweeper only
+# touches their own runs, not in-flight full runs.
+if [ "$MODE" = "canary" ]; then
+  SLUG="e2e-canary-$(date +%Y%m%d)-${RUN_ID_SUFFIX}"
+else
+  SLUG="e2e-$(date +%Y%m%d)-${RUN_ID_SUFFIX}"
+fi
 SLUG=$(echo "$SLUG" | tr '[:upper:]' '[:lower:]' | tr -cd 'a-z0-9-' | head -c 32)
 
-# ─── logging helpers ────────────────────────────────────────────────────
 log()  { echo "[$(date +%H:%M:%S)] $*"; }
 fail() { echo "[$(date +%H:%M:%S)] ❌ $*" >&2; exit 1; }
 ok()   { echo "[$(date +%H:%M:%S)] ✅ $*"; }
@@ -79,9 +70,6 @@ ok()   { echo "[$(date +%H:%M:%S)] ✅ $*"; }
 CURL_COMMON=(-sS --fail-with-body --max-time 30)
 
 # ─── cleanup trap ───────────────────────────────────────────────────────
-# Teardown runs on every exit path (success, failure, signal). The
-# delete-tenant endpoint is idempotent — calling it on a slug that was
-# never created returns 404 which we swallow.
 CLEANUP_DONE=0
 cleanup_org() {
   [ "$CLEANUP_DONE" = "1" ] && return 0
@@ -93,7 +81,6 @@ cleanup_org() {
   fi
 
   log "🧹 Tearing down org $SLUG..."
-  # Confirm token must equal slug — defense against accidental teardowns.
   curl "${CURL_COMMON[@]}" -X DELETE "$CP_URL/cp/admin/tenants/$SLUG" \
     -H "Authorization: Bearer $ADMIN_TOKEN" \
     -H "Content-Type: application/json" \
@@ -101,8 +88,6 @@ cleanup_org() {
     && ok "Teardown request accepted" \
     || log "Teardown returned non-2xx (may already be gone)"
 
-  # Leak detection: wait briefly then query CP for any remaining artefacts
-  # tagged with this slug. Anything left = bug in DeprovisionInstance.
   sleep 10
   local leak_count
   leak_count=$(curl "${CURL_COMMON[@]}" "$CP_URL/cp/admin/orgs" \
@@ -125,72 +110,71 @@ log "   Slug:    $SLUG"
 log "   Runtime: $RUNTIME"
 log "   Mode:    $MODE"
 log "   Timeout: ${PROVISION_TIMEOUT_SECS}s"
+[ "${E2E_INTENTIONAL_FAILURE:-0}" = "1" ] && log "   ⚠️  INTENTIONAL_FAILURE=1 — this run MUST fail mid-way; teardown MUST still clean up"
 log "═══════════════════════════════════════════════════════════════════"
 
-log "0/10 Preflight: CP reachable?"
+log "0/11 Preflight: CP reachable?"
 curl "${CURL_COMMON[@]}" "$CP_URL/health" >/dev/null || fail "CP health check failed"
 ok "CP reachable"
 
-# ─── 1. Accept terms (idempotent) ───────────────────────────────────────
-log "1/10 Accepting current terms..."
-curl "${CURL_COMMON[@]}" -X POST "$CP_URL/cp/auth/accept-terms" \
-  -H "Cookie: molecule_cp_session=$SESSION_COOKIE" \
-  -H "Content-Type: application/json" \
-  -d '{}' >/dev/null || log "accept-terms returned non-2xx (may already be accepted)"
-ok "Terms acceptance step complete"
+admin_call() {
+  local method="$1"; shift
+  local path="$1"; shift
+  curl "${CURL_COMMON[@]}" -X "$method" "$CP_URL$path" \
+    -H "Authorization: Bearer $ADMIN_TOKEN" \
+    -H "Content-Type: application/json" \
+    "$@"
+}
 
-# ─── 2. Create org ──────────────────────────────────────────────────────
-log "2/10 Creating org $SLUG..."
-CREATE_RESP=$(curl "${CURL_COMMON[@]}" -X POST "$CP_URL/cp/orgs" \
-  -H "Cookie: molecule_cp_session=$SESSION_COOKIE" \
-  -H "Content-Type: application/json" \
-  -d "{\"slug\":\"$SLUG\",\"name\":\"E2E $SLUG\"}")
+# ─── 1. Create org via admin endpoint ───────────────────────────────────
+log "1/11 Creating org $SLUG via /cp/admin/orgs..."
+CREATE_RESP=$(admin_call POST /cp/admin/orgs \
+  -d "{\"slug\":\"$SLUG\",\"name\":\"E2E $SLUG\",\"owner_user_id\":\"e2e-runner:$SLUG\"}")
 echo "$CREATE_RESP" | python3 -m json.tool >/dev/null || fail "Org create returned non-JSON: $CREATE_RESP"
 ok "Org created"
 
-# ─── 3. Wait for tenant EC2 + cloudflared tunnel + DNS ──────────────────
-log "3/10 Waiting for tenant provisioning (up to ${PROVISION_TIMEOUT_SECS}s)..."
+# ─── 2. Wait for tenant provisioning ────────────────────────────────────
+log "2/11 Waiting for tenant provisioning (up to ${PROVISION_TIMEOUT_SECS}s)..."
 DEADLINE=$(( $(date +%s) + PROVISION_TIMEOUT_SECS ))
 LAST_STATUS=""
 while true; do
   if [ "$(date +%s)" -gt "$DEADLINE" ]; then
     fail "Tenant provisioning timed out after ${PROVISION_TIMEOUT_SECS}s (last: $LAST_STATUS)"
   fi
-  STATUS_JSON=$(curl "${CURL_COMMON[@]}" "$CP_URL/cp/orgs/$SLUG/provision-status" \
-    -H "Cookie: molecule_cp_session=$SESSION_COOKIE" 2>/dev/null || echo '{}')
-  STATUS=$(echo "$STATUS_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('status',''))" 2>/dev/null || echo "")
+  LIST_JSON=$(admin_call GET /cp/admin/orgs 2>/dev/null || echo '{"orgs":[]}')
+  STATUS=$(echo "$LIST_JSON" | python3 -c "
+import json, sys
+d = json.load(sys.stdin)
+for o in d.get('orgs', []):
+    if o.get('slug') == '$SLUG':
+        print(o.get('status', ''))
+        sys.exit(0)
+print('')
+" 2>/dev/null || echo "")
   if [ "$STATUS" != "$LAST_STATUS" ]; then
     log "    status → $STATUS"
     LAST_STATUS="$STATUS"
   fi
   case "$STATUS" in
-    running)        break ;;
-    failed)         fail "Tenant provisioning failed: $(echo "$STATUS_JSON" | python3 -c 'import json,sys; print(json.load(sys.stdin).get("error",""))')" ;;
-    provisioning|awaiting_payment|pending|"") sleep 15 ;;
-    *)              sleep 15 ;;
+    running)  break ;;
+    failed)   fail "Tenant provisioning failed for $SLUG" ;;
+    *)        sleep 15 ;;
   esac
 done
 ok "Tenant provisioning complete"
 
-TENANT_URL=$(echo "$STATUS_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('url') or '')" 2>/dev/null || echo "")
-[ -z "$TENANT_URL" ] && TENANT_URL="https://$SLUG.moleculesai.app"
+TENANT_URL="https://$SLUG.moleculesai.app"
 log "    TENANT_URL=$TENANT_URL"
 
-# Auth strategy for tenant calls: session cookie. The tenant platform's
-# session-auth middleware verifies the cookie against CP via
-# /cp/auth/tenant-member; a session that's a member of the org is
-# treated as admin on that tenant. Same cookie that authed /cp/orgs
-# above, so no separate token plumbing needed -- as long as the test
-# user is auto-added as owner of the freshly-created org (which is the
-# default behaviour of POST /cp/orgs).
-#
-# provision-status does not return org_id or admin_token today; both
-# were an assumption in an earlier draft. X-Molecule-Org-Id is derived
-# server-side from the session membership lookup, so the header is
-# unnecessary.
+# ─── 3. Retrieve per-tenant admin token ────────────────────────────────
+log "3/11 Fetching per-tenant admin token..."
+TENANT_TOKEN_RESP=$(admin_call GET "/cp/admin/orgs/$SLUG/admin-token")
+TENANT_TOKEN=$(echo "$TENANT_TOKEN_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin).get('admin_token',''))" 2>/dev/null || echo "")
+[ -z "$TENANT_TOKEN" ] && fail "Could not retrieve per-tenant admin token for $SLUG"
+ok "Tenant admin token retrieved (len=${#TENANT_TOKEN})"
 
-# ─── 4. Wait for tenant TLS cert to be reachable ───────────────────────
-log "4/10 Waiting for tenant TLS / DNS propagation..."
+# ─── 4. Wait for tenant TLS / DNS propagation ──────────────────────────
+log "4/11 Waiting for tenant TLS / DNS propagation..."
 TLS_DEADLINE=$(( $(date +%s) + 180 ))
 while true; do
   if curl -sSfk --max-time 5 "$TENANT_URL/health" >/dev/null 2>&1; then
@@ -203,38 +187,47 @@ while true; do
 done
 ok "Tenant reachable at $TENANT_URL"
 
+# Sanity-test path: once the tenant is provisioned, poisoning the
+# tenant token proves the EXIT trap + leak assertion still fire.
+# Gate AFTER provisioning so the provision path itself stays valid.
+EFFECTIVE_TENANT_TOKEN="$TENANT_TOKEN"
+if [ "${E2E_INTENTIONAL_FAILURE:-0}" = "1" ]; then
+  log "⚠️  INTENTIONAL_FAILURE: poisoning tenant token for the workspace-provision step"
+  EFFECTIVE_TENANT_TOKEN="poisoned-$$"
+fi
+
 tenant_call() {
   local method="$1"; shift
   local path="$1"; shift
   curl "${CURL_COMMON[@]}" -X "$method" "$TENANT_URL$path" \
-    -H "Cookie: molecule_cp_session=$SESSION_COOKIE" \
+    -H "Authorization: Bearer $EFFECTIVE_TENANT_TOKEN" \
     "$@"
 }
 
-# ─── 5. Provision workspace (parent) ───────────────────────────────────
-log "5/10 Provisioning parent workspace (runtime=$RUNTIME)..."
+# ─── 5. Provision parent workspace ─────────────────────────────────────
+log "5/11 Provisioning parent workspace (runtime=$RUNTIME)..."
 PARENT_RESP=$(tenant_call POST /workspaces \
   -H "Content-Type: application/json" \
   -d "{\"name\":\"E2E Parent\",\"runtime\":\"$RUNTIME\",\"tier\":2,\"model\":\"gpt-4o\"}")
 PARENT_ID=$(echo "$PARENT_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin)['id'])")
 log "    PARENT_ID=$PARENT_ID"
 
-# ─── 6. Provision child (full mode only — for delegation test) ─────────
+# ─── 6. Provision child (full mode only) ────────────────────────────────
 CHILD_ID=""
 if [ "$MODE" = "full" ]; then
-  log "6/10 Provisioning child workspace..."
+  log "6/11 Provisioning child workspace..."
   CHILD_RESP=$(tenant_call POST /workspaces \
     -H "Content-Type: application/json" \
     -d "{\"name\":\"E2E Child\",\"runtime\":\"$RUNTIME\",\"tier\":2,\"model\":\"gpt-4o\",\"parent_id\":\"$PARENT_ID\"}")
   CHILD_ID=$(echo "$CHILD_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin)['id'])")
   log "    CHILD_ID=$CHILD_ID"
 else
-  log "6/10 Canary mode — skipping child workspace (full mode only)"
+  log "6/11 Canary mode — skipping child workspace"
 fi
 
 # ─── 7. Wait for workspace(s) online ───────────────────────────────────
-log "7/10 Waiting for workspace(s) to reach status=online..."
-WS_DEADLINE=$(( $(date +%s) + 600 ))  # 10 min
+log "7/11 Waiting for workspace(s) to reach status=online..."
+WS_DEADLINE=$(( $(date +%s) + 600 ))
 WS_TO_CHECK="$PARENT_ID"
 [ -n "$CHILD_ID" ] && WS_TO_CHECK="$WS_TO_CHECK $CHILD_ID"
 for wid in $WS_TO_CHECK; do
@@ -254,7 +247,7 @@ for wid in $WS_TO_CHECK; do
 done
 
 # ─── 8. A2A round-trip on parent ───────────────────────────────────────
-log "8/10 Sending A2A message to parent — expecting an agent response..."
+log "8/11 Sending A2A message to parent — expecting agent response..."
 A2A_PAYLOAD=$(python3 -c "
 import json, uuid
 print(json.dumps({
@@ -287,9 +280,9 @@ if echo "$AGENT_TEXT" | grep -qiE "error|exception"; then
 fi
 ok "A2A parent round-trip succeeded: \"${AGENT_TEXT:0:80}\""
 
-# ─── 9. HMA memory + peers + activity (full mode only) ────────────────
+# ─── 9. HMA + peers + activity (full mode) ─────────────────────────────
 if [ "$MODE" = "full" ]; then
-  log "9/10 Writing + reading HMA memory on parent..."
+  log "9/11 Writing + reading HMA memory on parent..."
   MEM_PAYLOAD=$(python3 -c "
 import json
 print(json.dumps({
@@ -311,10 +304,8 @@ print(json.dumps({
   tenant_call GET "/registry/$PARENT_ID/peers" -o /dev/null -w "%{http_code}\n" 2>&1 | head -1 > /tmp/peers_code.txt
   set -e
   PEERS_CODE=$(cat /tmp/peers_code.txt)
-  if [ "$PEERS_CODE" = "404" ]; then
-    fail "Peers endpoint missing (404) — route regression"
-  fi
-  ok "Peers endpoint reachable (HTTP $PEERS_CODE — 401 expected without ws token)"
+  [ "$PEERS_CODE" = "404" ] && fail "Peers endpoint missing (404) — route regression"
+  ok "Peers endpoint reachable (HTTP $PEERS_CODE)"
 
   ACTIVITY=$(tenant_call GET "/activity?workspace_id=$PARENT_ID&limit=5" 2>/dev/null || echo '[]')
   ACTIVITY_COUNT=$(echo "$ACTIVITY" | python3 -c "import json,sys
@@ -322,19 +313,12 @@ d=json.load(sys.stdin)
 print(len(d if isinstance(d, list) else d.get('events', [])))" 2>/dev/null || echo 0)
   log "    Activity events observed: $ACTIVITY_COUNT"
 else
-  log "9/10 Canary mode — skipping HMA / peers / activity (full mode only)"
+  log "9/11 Canary mode — skipping HMA / peers / activity"
 fi
 
-# ─── 10. Delegation mechanics (full mode + child exists) ──────────────
-# Verifies the proxy path that delegate_task uses under the hood:
-# parent → /workspaces/$CHILD_ID/a2a (X-Source-Workspace-Id: parent) →
-# child runtime → response routes back. Does NOT depend on LLM compliance
-# (the parent agent's tool-use behaviour is tested separately via
-# canvas-driven prompts). If the proxy mechanics are broken, no amount
-# of prompt-engineering on the parent will land a delegation; this
-# section pins the mechanics regression.
+# ─── 10. Delegation mechanics (full mode + child) ──────────────────────
 if [ "$MODE" = "full" ] && [ -n "$CHILD_ID" ]; then
-  log "10/11 Delegation mechanics: parent → child via /workspaces/:id/a2a proxy"
+  log "10/11 Delegation mechanics: parent → child via proxy"
   DELEG_PAYLOAD=$(python3 -c "
 import json, uuid
 print(json.dumps({
@@ -352,15 +336,13 @@ print(json.dumps({
 ")
   set +e
   DELEG_RESP=$(curl "${CURL_COMMON[@]}" -X POST "$TENANT_URL/workspaces/$CHILD_ID/a2a" \
-    -H "Cookie: molecule_cp_session=$SESSION_COOKIE" \
+    -H "Authorization: Bearer $EFFECTIVE_TENANT_TOKEN" \
     -H "X-Source-Workspace-Id: $PARENT_ID" \
     -H "Content-Type: application/json" \
     -d "$DELEG_PAYLOAD")
   DELEG_RC=$?
   set -e
-  if [ $DELEG_RC -ne 0 ]; then
-    fail "Delegation A2A POST failed (rc=$DELEG_RC)"
-  fi
+  [ $DELEG_RC -ne 0 ] && fail "Delegation A2A POST failed (rc=$DELEG_RC)"
   DELEG_TEXT=$(echo "$DELEG_RESP" | python3 -c "
 import json, sys
 try:
@@ -370,22 +352,17 @@ try:
 except Exception:
     print('')
 " 2>/dev/null || echo "")
-  if [ -z "$DELEG_TEXT" ]; then
-    fail "Delegation returned no text. Raw: ${DELEG_RESP:0:200}"
-  fi
+  [ -z "$DELEG_TEXT" ] && fail "Delegation returned no text. Raw: ${DELEG_RESP:0:200}"
   ok "Delegation proxy works (child responded: \"${DELEG_TEXT:0:60}\")"
 
-  # Verify activity log on child captured the delegation. The source
-  # workspace id is logged by the a2a_proxy when X-Source-Workspace-Id
-  # is present on the inbound request.
   CHILD_ACT=$(tenant_call GET "/activity?workspace_id=$CHILD_ID&limit=20" 2>/dev/null || echo '[]')
   if echo "$CHILD_ACT" | grep -q "$PARENT_ID"; then
     ok "Child activity log records parent as source"
   else
-    log "Child activity log did not reference parent (activity pipeline may be async — soft warning only)"
+    log "Child activity log did not reference parent (pipeline may be async)"
   fi
 fi
 
-# ─── 11. Cleanup runs via trap ────────────────────────────────────────
+# ─── 11. Teardown runs via trap ────────────────────────────────────────
 log "11/11 All checks passed. Teardown runs via EXIT trap."
 ok "═══ STAGING $MODE-SAAS E2E PASSED ═══"