diff --git a/.github/workflows/canary-staging.yml b/.github/workflows/canary-staging.yml new file mode 100644 index 00000000..8036b855 --- /dev/null +++ b/.github/workflows/canary-staging.yml @@ -0,0 +1,154 @@ +name: Canary — staging SaaS smoke (every 30 min) + +# Minimum viable health check: provisions one Hermes workspace on a fresh +# staging org, sends one A2A message, verifies PONG, tears down. ~8 min +# wall clock. Pages on failure by opening a GitHub issue; auto-closes the +# issue on the next green run. +# +# The full-SaaS workflow (e2e-staging-saas.yml) covers the broader surface +# but runs only on provisioning-critical pushes + nightly — this one +# catches drift in the 30-min window between those runs (AMI health, CF +# cert rotation, WorkOS session stability, etc.). +# +# Lean mode: E2E_MODE=canary skips the child workspace + HMA memory + +# peers/activity checks. One parent workspace + one A2A turn is enough +# to signal "SaaS stack end-to-end is alive." + +on: + schedule: + # Every 30 min. Cron on GitHub-hosted runners has a known drift of + # a few minutes under load — that's fine for a canary. + - cron: '*/30 * * * *' + workflow_dispatch: + +# Serialise with the full-SaaS workflow so they don't contend for the +# same org-create quota on staging. Different group key from +# e2e-staging-saas since we don't mind queueing canaries behind one +# full run, but two canaries SHOULD queue against each other. +concurrency: + group: canary-staging + cancel-in-progress: false + +permissions: + # Needed to open / close the alerting issue. + issues: write + contents: read + +jobs: + canary: + name: Canary smoke + runs-on: ubuntu-latest + timeout-minutes: 15 + + env: + MOLECULE_CP_URL: https://staging-api.moleculesai.app + MOLECULE_SESSION_COOKIE: ${{ secrets.MOLECULE_STAGING_SESSION_COOKIE }} + MOLECULE_ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }} + E2E_MODE: canary + E2E_RUNTIME: hermes + E2E_RUN_ID: "canary-${{ github.run_id }}" + + steps: + - uses: actions/checkout@v4 + + - name: Verify required secrets + run: | + if [ -z "$MOLECULE_SESSION_COOKIE" ] || [ -z "$MOLECULE_ADMIN_TOKEN" ]; then + echo "::error::Canary secrets missing — set MOLECULE_STAGING_SESSION_COOKIE and MOLECULE_STAGING_ADMIN_TOKEN" + exit 2 + fi + + - name: Canary run + id: canary + run: bash tests/e2e/test_staging_full_saas.sh + + # Alerting: open an issue on first failure, auto-close on recovery. + # Title includes a stable marker so multiple consecutive failures + # don't spam — they just add comments to the existing issue. + - name: Open issue on failure + if: failure() + uses: actions/github-script@v7 + with: + script: | + const title = '🔴 Canary failing: staging SaaS smoke'; + const runURL = `https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`; + const body = + `Canary run failed at ${new Date().toISOString()}.\n\n` + + `Run: ${runURL}\n\n` + + `This issue auto-closes on the next green canary run. ` + + `Consecutive failures add a comment here rather than a new issue.`; + + // Find an existing open canary issue (stable title match). + const { data: existing } = await github.rest.issues.listForRepo({ + owner: context.repo.owner, repo: context.repo.repo, + state: 'open', labels: 'canary-staging', + per_page: 10, + }); + const match = existing.find(i => i.title === title); + + if (match) { + await github.rest.issues.createComment({ + owner: context.repo.owner, repo: context.repo.repo, + issue_number: match.number, + body: `Canary still failing. ${runURL}`, + }); + core.info(`Commented on existing issue #${match.number}`); + } else { + await github.rest.issues.create({ + owner: context.repo.owner, repo: context.repo.repo, + title, body, + labels: ['canary-staging', 'bug'], + }); + core.info('Opened new canary failure issue'); + } + + - name: Auto-close canary issue on success + if: success() + uses: actions/github-script@v7 + with: + script: | + const title = '🔴 Canary failing: staging SaaS smoke'; + const { data: open } = await github.rest.issues.listForRepo({ + owner: context.repo.owner, repo: context.repo.repo, + state: 'open', labels: 'canary-staging', + per_page: 10, + }); + const match = open.find(i => i.title === title); + if (match) { + await github.rest.issues.createComment({ + owner: context.repo.owner, repo: context.repo.repo, + issue_number: match.number, + body: `Canary recovered at ${new Date().toISOString()}. Closing.`, + }); + await github.rest.issues.update({ + owner: context.repo.owner, repo: context.repo.repo, + issue_number: match.number, + state: 'closed', + }); + core.info(`Closed recovered canary issue #${match.number}`); + } + + - name: Teardown safety net + if: always() + env: + ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }} + run: | + set +e + orgs=$(curl -sS "$MOLECULE_CP_URL/cp/admin/orgs" \ + -H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \ + | python3 -c " + import json, sys + d = json.load(sys.stdin) + today = __import__('datetime').date.today().strftime('%Y%m%d') + candidates = [o['slug'] for o in d.get('orgs', []) + if o.get('slug','').startswith(f'e2e-{today}-canary-') + and o.get('status') not in ('purged',)] + print('\n'.join(candidates)) + " 2>/dev/null) + for slug in $orgs; do + curl -sS -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \ + -H "Authorization: Bearer $ADMIN_TOKEN" \ + -H "Content-Type: application/json" \ + -d "{\"confirm_token\":\"$slug\"}" >/dev/null || true + done + exit 0 diff --git a/.github/workflows/e2e-staging-canvas.yml b/.github/workflows/e2e-staging-canvas.yml new file mode 100644 index 00000000..e5347fec --- /dev/null +++ b/.github/workflows/e2e-staging-canvas.yml @@ -0,0 +1,117 @@ +name: E2E Staging Canvas (Playwright) + +# Playwright test suite that provisions a fresh staging org per run and +# verifies every workspace-panel tab renders without crashing. Complements +# e2e-staging-saas.yml (which tests the API shape) by exercising the +# actual browser + canvas bundle against live staging. +# +# Triggers: push to main or PR touching canvas sources + this workflow, +# manual dispatch, and weekly cron to catch browser/runtime drift even +# when canvas is quiet. + +on: + push: + branches: [main] + paths: + - 'canvas/**' + - '.github/workflows/e2e-staging-canvas.yml' + pull_request: + branches: [main] + paths: + - 'canvas/**' + - '.github/workflows/e2e-staging-canvas.yml' + workflow_dispatch: + schedule: + # Weekly on Sunday 08:00 UTC — catches Chrome / Playwright / Next.js + # release-note-shaped regressions that don't ride in with a PR. + - cron: '0 8 * * 0' + +concurrency: + group: e2e-staging-canvas + cancel-in-progress: false + +jobs: + playwright: + name: Canvas tabs E2E + runs-on: ubuntu-latest + timeout-minutes: 40 + + env: + CANVAS_E2E_STAGING: '1' + MOLECULE_CP_URL: https://staging-api.moleculesai.app + MOLECULE_SESSION_COOKIE: ${{ secrets.MOLECULE_STAGING_SESSION_COOKIE }} + MOLECULE_ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }} + + defaults: + run: + working-directory: canvas + + steps: + - uses: actions/checkout@v4 + + - name: Verify required secrets + run: | + if [ -z "$MOLECULE_SESSION_COOKIE" ] || [ -z "$MOLECULE_ADMIN_TOKEN" ]; then + echo "::error::Missing MOLECULE_STAGING_SESSION_COOKIE or MOLECULE_STAGING_ADMIN_TOKEN" + exit 2 + fi + + - name: Set up Node + uses: actions/setup-node@v4 + with: + node-version: '20' + cache: 'npm' + cache-dependency-path: canvas/package-lock.json + + - name: Install canvas deps + run: npm ci + + - name: Install Playwright browsers + run: npx playwright install --with-deps chromium + + - name: Run staging canvas E2E + run: npx playwright test --config=playwright.staging.config.ts + + - name: Upload Playwright report on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: playwright-report-staging + path: canvas/playwright-report-staging/ + retention-days: 14 + + - name: Upload screenshots on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: playwright-screenshots + path: canvas/test-results/ + retention-days: 14 + + # Safety-net teardown mirrors the bash-harness workflow — if + # globalTeardown didn't run (worker crash, runner cancel), this + # step sweeps any e2e-canvas-* org tagged with today's date. + - name: Teardown safety net + if: always() + env: + ADMIN_TOKEN: ${{ secrets.MOLECULE_STAGING_ADMIN_TOKEN }} + run: | + set +e + orgs=$(curl -sS "$MOLECULE_CP_URL/cp/admin/orgs" \ + -H "Authorization: Bearer $ADMIN_TOKEN" 2>/dev/null \ + | python3 -c " + import json, sys + d = json.load(sys.stdin) + today = __import__('datetime').date.today().strftime('%Y%m%d') + candidates = [o['slug'] for o in d.get('orgs', []) + if o.get('slug','').startswith(f'e2e-canvas-{today}-') + and o.get('status') not in ('purged',)] + print('\n'.join(candidates)) + " 2>/dev/null) + for slug in $orgs; do + curl -sS -X DELETE "$MOLECULE_CP_URL/cp/admin/tenants/$slug" \ + -H "Authorization: Bearer $ADMIN_TOKEN" \ + -H "Content-Type: application/json" \ + -d "{\"confirm_token\":\"$slug\"}" >/dev/null || true + done + exit 0 diff --git a/canvas/e2e/staging-setup.ts b/canvas/e2e/staging-setup.ts new file mode 100644 index 00000000..1850a426 --- /dev/null +++ b/canvas/e2e/staging-setup.ts @@ -0,0 +1,218 @@ +/** + * Playwright global setup for the staging canvas E2E. + * + * Provisions a fresh staging org per test run (via POST /cp/orgs against + * staging CP), waits for the tenant EC2 + cloudflared tunnel + TLS + * propagation, provisions one hermes workspace on the new tenant, waits + * for it to reach status=online, then exports: + * + * STAGING_TENANT_URL — https://.moleculesai.app + * STAGING_WORKSPACE_ID — UUID of the provisioned hermes workspace + * STAGING_SLUG — org slug (for teardown) + * + * staging-teardown.ts consumes STAGING_SLUG to DELETE the org. + * + * Required env (set via GH Actions secrets in the workflow): + * MOLECULE_CP_URL default: https://staging-api.moleculesai.app + * MOLECULE_SESSION_COOKIE WorkOS session for the staging test user + * MOLECULE_ADMIN_TOKEN CP admin bearer for teardown (unused in setup + * but checked here so both halves fail fast) + * + * Runs only when CANVAS_E2E_STAGING=1 so local `pnpm playwright test` in + * dev doesn't try to provision against staging by accident. + */ + +import type { FullConfig } from "@playwright/test"; +import { writeFileSync } from "fs"; +import { join } from "path"; + +const CP_URL = process.env.MOLECULE_CP_URL || "https://staging-api.moleculesai.app"; +const SESSION = process.env.MOLECULE_SESSION_COOKIE; +const ADMIN_TOKEN = process.env.MOLECULE_ADMIN_TOKEN; +const STAGING = process.env.CANVAS_E2E_STAGING === "1"; + +const PROVISION_TIMEOUT_MS = 15 * 60 * 1000; // 15 min cold-boot budget +const WORKSPACE_ONLINE_TIMEOUT_MS = 10 * 60 * 1000; +const TLS_TIMEOUT_MS = 3 * 60 * 1000; + +async function jsonFetch( + url: string, + init: RequestInit = {}, +): Promise<{ status: number; body: any }> { + const res = await fetch(url, { + ...init, + headers: { + "Content-Type": "application/json", + ...(init.headers || {}), + }, + }); + let body: any = null; + try { + body = await res.json(); + } catch { + /* non-JSON */ + } + return { status: res.status, body }; +} + +async function waitFor( + op: () => Promise, + deadlineMs: number, + intervalMs: number, + desc: string, +): Promise { + const deadline = Date.now() + deadlineMs; + while (Date.now() < deadline) { + const v = await op(); + if (v !== null) return v; + await new Promise((r) => setTimeout(r, intervalMs)); + } + throw new Error(`${desc}: timed out after ${Math.round(deadlineMs / 1000)}s`); +} + +function makeSlug(): string { + // Matches CP's ^[a-z][a-z0-9-]{2,31}$. The "e2e-" prefix lets auto-cleanup + // crons grep-find leftovers from crashed runs. + const y = new Date().toISOString().slice(0, 10).replace(/-/g, ""); + const rand = Math.random().toString(36).slice(2, 8); + return `e2e-canvas-${y}-${rand}`.slice(0, 32); +} + +export default async function globalSetup(_config: FullConfig): Promise { + if (!STAGING) { + console.log("[staging-setup] CANVAS_E2E_STAGING not set, skipping"); + return; + } + + if (!SESSION) { + throw new Error("MOLECULE_SESSION_COOKIE required for staging E2E"); + } + if (!ADMIN_TOKEN) { + throw new Error( + "MOLECULE_ADMIN_TOKEN required for staging E2E (teardown needs it)", + ); + } + + const slug = makeSlug(); + const cookieHeader = `molecule_cp_session=${SESSION}`; + console.log(`[staging-setup] Using slug=${slug}`); + + // 1. Accept terms (idempotent — already-accepted returns 2xx or 400) + await jsonFetch(`${CP_URL}/cp/auth/accept-terms`, { + method: "POST", + headers: { Cookie: cookieHeader }, + body: JSON.stringify({}), + }).catch(() => { + /* best-effort */ + }); + + // 2. Create org + const create = await jsonFetch(`${CP_URL}/cp/orgs`, { + method: "POST", + headers: { Cookie: cookieHeader }, + body: JSON.stringify({ slug, name: `E2E Canvas ${slug}` }), + }); + if (create.status >= 400) { + throw new Error( + `POST /cp/orgs returned ${create.status}: ${JSON.stringify(create.body)}`, + ); + } + console.log(`[staging-setup] Org created: ${slug}`); + + // 3. Wait for tenant provision (status=running) + const finalStatus = await waitFor<{ url?: string; status: string }>( + async () => { + const r = await jsonFetch( + `${CP_URL}/cp/orgs/${slug}/provision-status`, + { headers: { Cookie: cookieHeader } }, + ); + if (r.status !== 200) return null; + if (r.body?.status === "running") return r.body; + if (r.body?.status === "failed") { + throw new Error(`Provisioning failed: ${JSON.stringify(r.body)}`); + } + return null; + }, + PROVISION_TIMEOUT_MS, + 15_000, + "tenant provision", + ); + + const tenantURL = + finalStatus.url || + `https://${slug}.${CP_URL.includes("staging") ? "moleculesai.app" : "moleculesai.app"}`; + console.log(`[staging-setup] Tenant URL: ${tenantURL}`); + + // 4. Wait for tenant TLS readiness + await waitFor( + async () => { + try { + const res = await fetch(`${tenantURL}/health`, { + signal: AbortSignal.timeout(5000), + }); + return res.ok ? true : null; + } catch { + return null; + } + }, + TLS_TIMEOUT_MS, + 5_000, + "tenant TLS", + ); + + // 5. Provision one hermes workspace (cheapest, fastest-booting) + const ws = await jsonFetch(`${tenantURL}/workspaces`, { + method: "POST", + headers: { Cookie: cookieHeader }, + body: JSON.stringify({ + name: "E2E Canvas Test", + runtime: "hermes", + tier: 2, + model: "gpt-4o", + }), + }); + if (ws.status >= 400 || !ws.body?.id) { + throw new Error( + `Workspace create failed (${ws.status}): ${JSON.stringify(ws.body)}`, + ); + } + const workspaceId = ws.body.id as string; + console.log(`[staging-setup] Workspace created: ${workspaceId}`); + + // 6. Wait for workspace online + await waitFor( + async () => { + const r = await jsonFetch(`${tenantURL}/workspaces/${workspaceId}`, { + headers: { Cookie: cookieHeader }, + }); + if (r.status !== 200) return null; + if (r.body?.status === "online") return true; + if (r.body?.status === "failed") { + throw new Error( + `Workspace ${workspaceId} failed: ${r.body.last_sample_error || ""}`, + ); + } + return null; + }, + WORKSPACE_ONLINE_TIMEOUT_MS, + 10_000, + "workspace online", + ); + console.log(`[staging-setup] Workspace online`); + + // 7. Export via a state file so staging-teardown and the test spec can + // pick up the same slug / urls. Playwright's global setup can't + // export env to the test subprocess directly in all configurations. + const stateFile = join(process.cwd(), ".playwright-staging-state.json"); + writeFileSync( + stateFile, + JSON.stringify({ slug, tenantURL, workspaceId }, null, 2), + ); + // Also set env for in-process test reads. + process.env.STAGING_SLUG = slug; + process.env.STAGING_TENANT_URL = tenantURL; + process.env.STAGING_WORKSPACE_ID = workspaceId; + process.env.STAGING_SESSION_COOKIE = SESSION; + + console.log(`[staging-setup] Ready — ${stateFile}`); +} diff --git a/canvas/e2e/staging-tabs.spec.ts b/canvas/e2e/staging-tabs.spec.ts new file mode 100644 index 00000000..6e8b5d9c --- /dev/null +++ b/canvas/e2e/staging-tabs.spec.ts @@ -0,0 +1,162 @@ +/** + * Staging canvas E2E — opens each of the 13 workspace-panel tabs against a + * fresh staging org provisioned in the global setup. Asserts each tab + * renders without throwing and captures a screenshot for visual review. + * + * Relies on `staging-setup.ts` to provision a tenant org, provision one + * hermes workspace on it, and hand us a tenant URL + workspace id via + * env (set by the setup file before tests run). Global teardown tears + * down the org. + * + * Runs only when CANVAS_E2E_STAGING=1 — tests are skipped in local dev + * where the prerequisite env isn't set. + */ + +import { test, expect } from "@playwright/test"; + +// Tab ids as declared in canvas/src/components/SidePanel.tsx TABS. +// Kept duplicated here (not imported) because Playwright tests run outside +// the Next.js bundler and can't import from @/components paths. +const TAB_IDS = [ + "chat", + "activity", + "details", + "skills", + "terminal", + "config", + "schedule", + "channels", + "files", + "memory", + "traces", + "events", + "audit", +] as const; + +const STAGING = process.env.CANVAS_E2E_STAGING === "1"; + +test.skip(!STAGING, "CANVAS_E2E_STAGING not set — skipping staging-only tests"); + +test.describe("staging canvas tabs", () => { + test("each workspace-panel tab renders without error", async ({ + page, + context, + }) => { + const tenantURL = process.env.STAGING_TENANT_URL; + const sessionCookie = process.env.STAGING_SESSION_COOKIE; + const workspaceId = process.env.STAGING_WORKSPACE_ID; + + if (!tenantURL || !sessionCookie || !workspaceId) { + throw new Error( + "staging-setup.ts did not export STAGING_TENANT_URL / STAGING_SESSION_COOKIE / STAGING_WORKSPACE_ID — did global setup run?", + ); + } + + // The session cookie was minted by CP at sign-in; canvas on the tenant + // subdomain shares it via the parent-domain scope (.moleculesai.app). + // Playwright needs both the cookie and the cross-domain visibility. + const url = new URL(tenantURL); + await context.addCookies([ + { + name: "molecule_cp_session", + value: sessionCookie, + // Leading dot → valid on all subdomains. The staging WorkOS auth + // flow sets it this way, so we mirror. + domain: "." + url.hostname.replace(/^[^.]+\./, ""), + path: "/", + httpOnly: true, + secure: true, + sameSite: "Lax", + }, + ]); + + const consoleErrors: string[] = []; + page.on("console", (msg) => { + if (msg.type() === "error") { + consoleErrors.push(msg.text()); + } + }); + + await page.goto(tenantURL, { waitUntil: "networkidle" }); + + // Canvas hydration races WebSocket connect + /workspaces fetch. Wait + // for the workspace node selector or the hydration-error banner — + // whichever wins first. + await page.waitForSelector('[role="tablist"], [data-testid="hydration-error"]', { + timeout: 45_000, + }); + + const hydrationErr = await page + .locator('[data-testid="hydration-error"]') + .count(); + expect( + hydrationErr, + "canvas hydration failed — check staging CP + tenant reachability", + ).toBe(0); + + // Click the workspace node to open the side panel. The node's + // accessible name is the workspace display name; we match by id attr + // to avoid coupling to the display name which tests can't know. + const node = page.locator(`[data-workspace-id="${workspaceId}"]`).first(); + // Fallback: click by role if the data attribute isn't wired + if ((await node.count()) === 0) { + // Try clicking the first workspace card visible + const firstNode = page.locator('[role="button"][aria-label*="Workspace"]').first(); + await firstNode.click({ timeout: 10_000 }); + } else { + await node.click({ timeout: 10_000 }); + } + + // Wait for the side panel tablist to mount + await page.waitForSelector('[role="tablist"]', { timeout: 15_000 }); + + for (const tabId of TAB_IDS) { + await test.step(`tab: ${tabId}`, async () => { + const tabButton = page.locator(`#tab-${tabId}`); + await expect( + tabButton, + `tab-${tabId} button missing — TABS list may have drifted`, + ).toBeVisible({ timeout: 5_000 }); + await tabButton.click(); + + const panel = page.locator(`#panel-${tabId}`); + await expect( + panel, + `panel for ${tabId} never rendered`, + ).toBeVisible({ timeout: 10_000 }); + + // No toast-style error banner should appear for a healthy workspace. + // Known exceptions: terminal may 4xx on SaaS cross-EC2 (WS target + // unreachable), peers may 401 without workspace token. Those are + // reported separately in issue #1369; here we just guard against + // hard crashes (toast with "Error" keyword). + const errorToasts = await page + .locator('[role="alert"]:has-text("Failed to load")') + .count(); + expect( + errorToasts, + `tab ${tabId}: saw "Failed to load" toast`, + ).toBe(0); + + await page.screenshot({ + path: `test-results/staging-tab-${tabId}.png`, + fullPage: false, + }); + }); + } + + // Aggregate console-error check. Allow a small budget for known-noisy + // Sentry/Vercel analytics errors that don't reflect app health. + const appErrors = consoleErrors.filter( + (msg) => + !msg.includes("sentry") && + !msg.includes("vercel") && + !msg.includes("WebSocket") && // WS failures ≠ app failures + !msg.includes("favicon"), + ); + expect( + appErrors, + `unexpected console errors:\n${appErrors.join("\n")}`, + ).toHaveLength(0); + }); +}); diff --git a/canvas/e2e/staging-teardown.ts b/canvas/e2e/staging-teardown.ts new file mode 100644 index 00000000..f6e79f92 --- /dev/null +++ b/canvas/e2e/staging-teardown.ts @@ -0,0 +1,66 @@ +/** + * Playwright global teardown — deletes the staging org provisioned by + * staging-setup.ts via DELETE /cp/admin/tenants/:slug. Runs on success AND + * failure (Playwright calls globalTeardown regardless). + * + * The workflow's always()-step safety net also catches orphan orgs + * tagged with the run ID, so this is the primary cleanup and the + * workflow step is the belt-and-braces backup. + */ + +import { existsSync, readFileSync, unlinkSync } from "fs"; +import { join } from "path"; + +const CP_URL = process.env.MOLECULE_CP_URL || "https://staging-api.moleculesai.app"; +const ADMIN_TOKEN = process.env.MOLECULE_ADMIN_TOKEN; +const STAGING = process.env.CANVAS_E2E_STAGING === "1"; + +export default async function globalTeardown(): Promise { + if (!STAGING) return; + if (!ADMIN_TOKEN) { + console.warn("[staging-teardown] no MOLECULE_ADMIN_TOKEN, skipping"); + return; + } + + const stateFile = join(process.cwd(), ".playwright-staging-state.json"); + if (!existsSync(stateFile)) { + console.warn("[staging-teardown] no state file — setup must have failed before org create; nothing to tear down"); + return; + } + + let slug: string; + try { + const state = JSON.parse(readFileSync(stateFile, "utf-8")); + slug = state.slug; + } catch (e) { + console.warn(`[staging-teardown] state file unreadable: ${e}`); + return; + } + + console.log(`[staging-teardown] Deleting org ${slug}...`); + try { + const res = await fetch(`${CP_URL}/cp/admin/tenants/${slug}`, { + method: "DELETE", + headers: { + Authorization: `Bearer ${ADMIN_TOKEN}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ confirm_token: slug }), + }); + if (res.ok) { + console.log(`[staging-teardown] ${slug} deleted`); + } else { + console.warn( + `[staging-teardown] DELETE returned ${res.status} (may already be gone)`, + ); + } + } catch (e) { + console.warn(`[staging-teardown] DELETE failed: ${e}`); + } + + try { + unlinkSync(stateFile); + } catch { + /* non-fatal */ + } +} diff --git a/canvas/playwright.staging.config.ts b/canvas/playwright.staging.config.ts new file mode 100644 index 00000000..62dec331 --- /dev/null +++ b/canvas/playwright.staging.config.ts @@ -0,0 +1,50 @@ +/** + * Playwright config for staging canvas E2E. + * + * Separate from playwright.config.ts (local dev) so: + * - globalSetup / globalTeardown don't run for every local `pnpm test` + * - Retries + timeouts can be longer (staging is remote + shared) + * - baseURL is dynamic (set by globalSetup → STAGING_TENANT_URL) + * + * Invoked by the e2e-staging-canvas GH Actions workflow: + * npx playwright test --config=playwright.staging.config.ts + */ + +import { defineConfig } from "@playwright/test"; + +export default defineConfig({ + testDir: "./e2e", + // Only the staging-*.spec.ts files run under this config. The smoke + + // unit specs (chat-separation, filestab-smoke, etc.) stay on the local + // config so they don't hit staging. + testMatch: /staging-.*\.spec\.ts/, + // Global setup provisions the org; budget generously because EC2 boot + // is ~5 min and can drift to 10+ on cold AMI days. + timeout: 120_000, + expect: { timeout: 15_000 }, + fullyParallel: false, + // A transient network blip shouldn't cost us the whole run. Two retries + // mean up to 3 attempts — staging flakes fall within that budget. + retries: 2, + // One worker: the setup provisions exactly one org/workspace, and + // parallel specs would fight over the shared workspace selector state. + workers: 1, + globalSetup: "./e2e/staging-setup.ts", + globalTeardown: "./e2e/staging-teardown.ts", + use: { + // STAGING_TENANT_URL gets written to process.env in global setup, but + // Playwright resolves baseURL before setup runs. We read it inside + // each spec instead — don't hard-code here. + headless: true, + screenshot: "only-on-failure", + video: "retain-on-failure", + trace: "retain-on-failure", + navigationTimeout: 45_000, + actionTimeout: 15_000, + }, + reporter: [ + ["list"], + ["html", { outputFolder: "playwright-report-staging", open: "never" }], + ], + projects: [{ name: "chromium", use: { browserName: "chromium" } }], +}); diff --git a/tests/e2e/test_staging_full_saas.sh b/tests/e2e/test_staging_full_saas.sh index f3427ec2..4e0ace5c 100755 --- a/tests/e2e/test_staging_full_saas.sh +++ b/tests/e2e/test_staging_full_saas.sh @@ -37,6 +37,12 @@ # E2E_RUN_ID Override the auto-generated suffix. CI # should pass ${GITHUB_RUN_ID} so the # org slug is grep-able in AWS later. +# E2E_MODE "full" (default) runs every section. +# "canary" runs a lean variant: one +# parent workspace, one A2A PONG, then +# teardown. Used by the 30-min cron +# workflow so each canary finishes in +# ~8 min instead of the full ~20. # # Exit codes: # 0 happy path @@ -53,6 +59,11 @@ ADMIN_TOKEN="${MOLECULE_ADMIN_TOKEN:?MOLECULE_ADMIN_TOKEN required — from Rail RUNTIME="${E2E_RUNTIME:-hermes}" PROVISION_TIMEOUT_SECS="${E2E_PROVISION_TIMEOUT_SECS:-900}" RUN_ID_SUFFIX="${E2E_RUN_ID:-$(date +%H%M%S)-$$}" +MODE="${E2E_MODE:-full}" +case "$MODE" in + full|canary) ;; + *) echo "E2E_MODE must be 'full' or 'canary' (got: $MODE)" >&2; exit 2 ;; +esac # Slug constraints from orgs.go: ^[a-z][a-z0-9-]{2,31}$. # Prefix with "e2e-" so test orgs are grep-able and auto-cleanup crons @@ -112,6 +123,7 @@ log " Staging full-SaaS E2E" log " CP: $CP_URL" log " Slug: $SLUG" log " Runtime: $RUNTIME" +log " Mode: $MODE" log " Timeout: ${PROVISION_TIMEOUT_SECS}s" log "═══════════════════════════════════════════════════════════════════" @@ -160,17 +172,22 @@ while true; do done ok "Tenant provisioning complete" -TENANT_URL=$(echo "$STATUS_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('tenant_url') or d.get('url') or '')" 2>/dev/null || echo "") +TENANT_URL=$(echo "$STATUS_JSON" | python3 -c "import json,sys; d=json.load(sys.stdin); print(d.get('url') or '')" 2>/dev/null || echo "") [ -z "$TENANT_URL" ] && TENANT_URL="https://$SLUG.moleculesai.app" log " TENANT_URL=$TENANT_URL" -# Tenant admin token — returned by provision-status for the -# just-provisioned org so the test can call tenant admin endpoints -# (POST /workspaces etc.) without depending on a workspace auth token. -TENANT_ADMIN_TOKEN=$(echo "$STATUS_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('admin_token',''))" 2>/dev/null || echo "") -[ -z "$TENANT_ADMIN_TOKEN" ] && fail "provision-status did not return admin_token" - -ORG_ID=$(echo "$STATUS_JSON" | python3 -c "import json,sys; print(json.load(sys.stdin).get('org_id',''))" 2>/dev/null || echo "") +# Auth strategy for tenant calls: session cookie. The tenant platform's +# session-auth middleware verifies the cookie against CP via +# /cp/auth/tenant-member; a session that's a member of the org is +# treated as admin on that tenant. Same cookie that authed /cp/orgs +# above, so no separate token plumbing needed -- as long as the test +# user is auto-added as owner of the freshly-created org (which is the +# default behaviour of POST /cp/orgs). +# +# provision-status does not return org_id or admin_token today; both +# were an assumption in an earlier draft. X-Molecule-Org-Id is derived +# server-side from the session membership lookup, so the header is +# unnecessary. # ─── 4. Wait for tenant TLS cert to be reachable ─────────────────────── log "4/10 Waiting for tenant TLS / DNS propagation..." @@ -190,8 +207,7 @@ tenant_call() { local method="$1"; shift local path="$1"; shift curl "${CURL_COMMON[@]}" -X "$method" "$TENANT_URL$path" \ - -H "Authorization: Bearer $TENANT_ADMIN_TOKEN" \ - -H "X-Molecule-Org-Id: $ORG_ID" \ + -H "Cookie: molecule_cp_session=$SESSION_COOKIE" \ "$@" } @@ -203,18 +219,25 @@ PARENT_RESP=$(tenant_call POST /workspaces \ PARENT_ID=$(echo "$PARENT_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin)['id'])") log " PARENT_ID=$PARENT_ID" -# ─── 6. Provision child (for delegation test) ────────────────────────── -log "6/10 Provisioning child workspace..." -CHILD_RESP=$(tenant_call POST /workspaces \ - -H "Content-Type: application/json" \ - -d "{\"name\":\"E2E Child\",\"runtime\":\"$RUNTIME\",\"tier\":2,\"model\":\"gpt-4o\",\"parent_id\":\"$PARENT_ID\"}") -CHILD_ID=$(echo "$CHILD_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin)['id'])") -log " CHILD_ID=$CHILD_ID" +# ─── 6. Provision child (full mode only — for delegation test) ───────── +CHILD_ID="" +if [ "$MODE" = "full" ]; then + log "6/10 Provisioning child workspace..." + CHILD_RESP=$(tenant_call POST /workspaces \ + -H "Content-Type: application/json" \ + -d "{\"name\":\"E2E Child\",\"runtime\":\"$RUNTIME\",\"tier\":2,\"model\":\"gpt-4o\",\"parent_id\":\"$PARENT_ID\"}") + CHILD_ID=$(echo "$CHILD_RESP" | python3 -c "import json,sys; print(json.load(sys.stdin)['id'])") + log " CHILD_ID=$CHILD_ID" +else + log "6/10 Canary mode — skipping child workspace (full mode only)" +fi -# ─── 7. Wait for both online ─────────────────────────────────────────── -log "7/10 Waiting for both workspaces to reach status=online..." +# ─── 7. Wait for workspace(s) online ─────────────────────────────────── +log "7/10 Waiting for workspace(s) to reach status=online..." WS_DEADLINE=$(( $(date +%s) + 600 )) # 10 min -for wid in "$PARENT_ID" "$CHILD_ID"; do +WS_TO_CHECK="$PARENT_ID" +[ -n "$CHILD_ID" ] && WS_TO_CHECK="$WS_TO_CHECK $CHILD_ID" +for wid in $WS_TO_CHECK; do while true; do if [ "$(date +%s)" -gt "$WS_DEADLINE" ]; then fail "Workspace $wid never reached online within 10 min" @@ -264,44 +287,105 @@ if echo "$AGENT_TEXT" | grep -qiE "error|exception"; then fi ok "A2A parent round-trip succeeded: \"${AGENT_TEXT:0:80}\"" -# ─── 9. HMA memory write/read ────────────────────────────────────────── -log "9/10 Writing + reading HMA memory on parent..." -MEM_PAYLOAD=$(python3 -c " +# ─── 9. HMA memory + peers + activity (full mode only) ──────────────── +if [ "$MODE" = "full" ]; then + log "9/10 Writing + reading HMA memory on parent..." + MEM_PAYLOAD=$(python3 -c " import json print(json.dumps({ 'content': 'E2E memory seed — run $SLUG', 'scope': 'LOCAL' })) ") -tenant_call POST "/workspaces/$PARENT_ID/memories" \ - -H "Content-Type: application/json" \ - -d "$MEM_PAYLOAD" >/dev/null || fail "memory POST failed" -# Read back and confirm presence -MEM_LIST=$(tenant_call GET "/workspaces/$PARENT_ID/memories?scope=LOCAL") -if ! echo "$MEM_LIST" | grep -q "run $SLUG"; then - fail "HMA memory not readable after write. List: ${MEM_LIST:0:200}" -fi -ok "HMA memory write+read roundtripped" + tenant_call POST "/workspaces/$PARENT_ID/memories" \ + -H "Content-Type: application/json" \ + -d "$MEM_PAYLOAD" >/dev/null || fail "memory POST failed" + MEM_LIST=$(tenant_call GET "/workspaces/$PARENT_ID/memories?scope=LOCAL") + if ! echo "$MEM_LIST" | grep -q "run $SLUG"; then + fail "HMA memory not readable after write. List: ${MEM_LIST:0:200}" + fi + ok "HMA memory write+read roundtripped" -# ─── 9b. Peers + activity smoke ──────────────────────────────────────── -log "9b. Peer discovery + activity log smoke..." -# Peers (uses workspace bearer — we don't have one here, so expect 401 and -# just verify the endpoint responds at all rather than 404). -set +e -tenant_call GET "/registry/$PARENT_ID/peers" -o /dev/null -w "%{http_code}\n" 2>&1 | head -1 > /tmp/peers_code.txt -set -e -PEERS_CODE=$(cat /tmp/peers_code.txt) -if [ "$PEERS_CODE" = "404" ]; then - fail "Peers endpoint missing (404) — route regression" -fi -ok "Peers endpoint reachable (HTTP $PEERS_CODE — 401 expected without ws token)" + log "9b. Peer discovery + activity log smoke..." + set +e + tenant_call GET "/registry/$PARENT_ID/peers" -o /dev/null -w "%{http_code}\n" 2>&1 | head -1 > /tmp/peers_code.txt + set -e + PEERS_CODE=$(cat /tmp/peers_code.txt) + if [ "$PEERS_CODE" = "404" ]; then + fail "Peers endpoint missing (404) — route regression" + fi + ok "Peers endpoint reachable (HTTP $PEERS_CODE — 401 expected without ws token)" -ACTIVITY=$(tenant_call GET "/activity?workspace_id=$PARENT_ID&limit=5" 2>/dev/null || echo '[]') -ACTIVITY_COUNT=$(echo "$ACTIVITY" | python3 -c "import json,sys + ACTIVITY=$(tenant_call GET "/activity?workspace_id=$PARENT_ID&limit=5" 2>/dev/null || echo '[]') + ACTIVITY_COUNT=$(echo "$ACTIVITY" | python3 -c "import json,sys d=json.load(sys.stdin) print(len(d if isinstance(d, list) else d.get('events', [])))" 2>/dev/null || echo 0) -log " Activity events observed: $ACTIVITY_COUNT" + log " Activity events observed: $ACTIVITY_COUNT" +else + log "9/10 Canary mode — skipping HMA / peers / activity (full mode only)" +fi -# ─── 10. Cleanup runs via trap ──────────────────────────────────────── -log "10/10 All checks passed. Teardown runs via EXIT trap." -ok "═══ STAGING FULL-SAAS E2E PASSED ═══" +# ─── 10. Delegation mechanics (full mode + child exists) ────────────── +# Verifies the proxy path that delegate_task uses under the hood: +# parent → /workspaces/$CHILD_ID/a2a (X-Source-Workspace-Id: parent) → +# child runtime → response routes back. Does NOT depend on LLM compliance +# (the parent agent's tool-use behaviour is tested separately via +# canvas-driven prompts). If the proxy mechanics are broken, no amount +# of prompt-engineering on the parent will land a delegation; this +# section pins the mechanics regression. +if [ "$MODE" = "full" ] && [ -n "$CHILD_ID" ]; then + log "10/11 Delegation mechanics: parent → child via /workspaces/:id/a2a proxy" + DELEG_PAYLOAD=$(python3 -c " +import json, uuid +print(json.dumps({ + 'jsonrpc': '2.0', + 'method': 'message/send', + 'id': 'e2e-deleg-1', + 'params': { + 'message': { + 'role': 'user', + 'messageId': f'e2e-deleg-{uuid.uuid4().hex[:8]}', + 'parts': [{'kind': 'text', 'text': 'Reply with exactly: CHILD_PONG'}] + } + } +})) +") + set +e + DELEG_RESP=$(curl "${CURL_COMMON[@]}" -X POST "$TENANT_URL/workspaces/$CHILD_ID/a2a" \ + -H "Cookie: molecule_cp_session=$SESSION_COOKIE" \ + -H "X-Source-Workspace-Id: $PARENT_ID" \ + -H "Content-Type: application/json" \ + -d "$DELEG_PAYLOAD") + DELEG_RC=$? + set -e + if [ $DELEG_RC -ne 0 ]; then + fail "Delegation A2A POST failed (rc=$DELEG_RC)" + fi + DELEG_TEXT=$(echo "$DELEG_RESP" | python3 -c " +import json, sys +try: + d = json.load(sys.stdin) + parts = d.get('result', {}).get('parts', []) + print(parts[0].get('text', '') if parts else '') +except Exception: + print('') +" 2>/dev/null || echo "") + if [ -z "$DELEG_TEXT" ]; then + fail "Delegation returned no text. Raw: ${DELEG_RESP:0:200}" + fi + ok "Delegation proxy works (child responded: \"${DELEG_TEXT:0:60}\")" + + # Verify activity log on child captured the delegation. The source + # workspace id is logged by the a2a_proxy when X-Source-Workspace-Id + # is present on the inbound request. + CHILD_ACT=$(tenant_call GET "/activity?workspace_id=$CHILD_ID&limit=20" 2>/dev/null || echo '[]') + if echo "$CHILD_ACT" | grep -q "$PARENT_ID"; then + ok "Child activity log records parent as source" + else + log "Child activity log did not reference parent (activity pipeline may be async — soft warning only)" + fi +fi + +# ─── 11. Cleanup runs via trap ──────────────────────────────────────── +log "11/11 All checks passed. Teardown runs via EXIT trap." +ok "═══ STAGING $MODE-SAAS E2E PASSED ═══"