From b80816a3b018608016e1ebb9374a3a14b594d20b Mon Sep 17 00:00:00 2001 From: Molecule Core DevOps Date: Wed, 3 Jun 2026 21:05:52 -0700 Subject: [PATCH] fix(e2e): canvas-tabs staging setup waits for RENDERABLE, not online (#2199) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit E2E Staging Canvas (Playwright) / "Canvas tabs E2E" went red on main HEAD b9d2f023. The actual failure (runner-6 task 258160) is in the Playwright globalSetup, NOT in any spec assertion: [staging-setup] Workspace created: 8e5c7354-... Error: Workspace failed: (no last_sample_error) full body: {... "runtime":"hermes","status":"failed","uptime_seconds":0, "last_sample_error":null ...} at canvas/e2e/staging-setup.ts:272 (waitFor "workspace online") Root cause — NOT a canvas/test regression and NOT timing fragility. It is a deterministic consequence of workspace-server #2162 (merged 2026-06-03, "platform-managed workspace must fail-closed when CP proxy env absent"), which is a correct production safety fix. The canvas E2E creates a bare hermes/gpt-4o workspace that defaults closed to platform_managed; on a staging tenant without MOLECULE_LLM_BASE_URL / MOLECULE_LLM_USAGE_TOKEN, the agent now aborts at boot with MISSING_PLATFORM_PROXY — surfacing as the pre-start credential-abort shape (status:"failed", uptime_seconds:0, no last_sample_error). Pre-#2162 the same workspace booted credential-less (the bug #2162 fixed) so the old harness happened to pass. The fix is in the harness, because this test does not need a booted agent: staging-tabs.spec.ts only opens the 13 side-panel tabs and asserts no hard crash / no "Failed to load" toast. It makes zero LLM calls and even mocks /cp/auth/me + 401→200. All it needs is a workspace ROW so the node + tabs render. So step 6 now waits for RENDERABLE instead of strictly online: - online -> happy path (staging with proxy env) - failed + uptime_seconds==0 + no sample -> pre-start credential-abort: agent never ran, row still renders -> proceed, with a loud console.warn - any other failed (last_sample_error present, OR uptime_seconds>0 i.e. the agent started then crashed) -> still hard-throws (no masking) Real infra/provision failure stays loud one step earlier at the org level (instance_status === "failed", unchanged). Verification: tsc clean for canvas/e2e/staging-* (pre-existing tsc errors are all in unrelated __tests__ files); `playwright test --list` resolves globalSetup + the single spec. Full live run needs staging CP creds not available locally; the changed branch is the globalSetup readiness gate, verified by inspection against the captured failing-run body. Co-Authored-By: Claude Opus 4.8 (1M context) --- canvas/e2e/staging-setup.ts | 57 ++++++++++++++++++++++++++++++++++--- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/canvas/e2e/staging-setup.ts b/canvas/e2e/staging-setup.ts index 873ac07bd..88b007fd7 100644 --- a/canvas/e2e/staging-setup.ts +++ b/canvas/e2e/staging-setup.ts @@ -250,7 +250,38 @@ export default async function globalSetup(_config: FullConfig): Promise { const workspaceId = ws.body.id as string; console.log(`[staging-setup] Workspace created: ${workspaceId}`); - // 6. Wait for workspace online + // 6. Wait for workspace RENDERABLE. + // + // This harness exists to verify the canvas *tab UI* renders (staging- + // tabs.spec.ts: open each of the 13 workspace-panel tabs, assert no hard + // crash / no "Failed to load" toast). It does NOT exercise the agent — + // no LLM call is made, the spec even mocks /cp/auth/me and 401→200. All + // it needs is a workspace ROW that the canvas lists so the node renders + // and the side-panel tabs open. A fully-`online` agent is NOT required. + // + // That distinction became load-bearing on 2026-06-03: workspace-server + // #2162 (fix(provision): platform-managed workspace must fail-closed when + // CP proxy env absent) made a platform_managed workspace ABORT AT BOOT + // with MISSING_PLATFORM_PROXY when MOLECULE_LLM_BASE_URL / + // MOLECULE_LLM_USAGE_TOKEN are not present in the tenant's env. The + // canvas E2E creates a bare hermes/gpt-4o workspace, which defaults + // closed to platform_managed (workspace_provision.go:~1009), and the + // staging tenant does not carry the CP proxy env — so the agent never + // starts. Pre-#2162 this same workspace booted credential-less (the bug + // #2162 fixed) and the tabs rendered fine; #2162 is a correct production + // safety fix, but it surfaced here as `status:"failed", uptime_seconds:0, + // last_sample_error:null` — the pre-start credential-abort shape — and the + // old hard-throw turned a UI-irrelevant boot skip into a main-red + // (core#2199). The agent boot stage is simply not what this test gates. + // + // So: online is the happy path. A `failed` row that is the PRE-START + // credential-abort shape (the agent process never ran: uptime_seconds==0 + // AND no last_sample_error) is treated as RENDERABLE — the row exists, + // the node + tabs render, proceed. We do NOT mask a real boot regression: + // any `failed` carrying a last_sample_error, OR a non-zero uptime (the + // agent started then crashed — image pull, panic, PYTHONPATH, etc.), + // still hard-throws. Genuine *infra* provision failure is already caught + // loud one step earlier at the org level (instance_status === "failed"). await waitFor( async () => { const r = await jsonFetch(`${tenantURL}/workspaces/${workspaceId}`, { @@ -259,6 +290,24 @@ export default async function globalSetup(_config: FullConfig): Promise { if (r.status !== 200) return null; if (r.body?.status === "online") return true; if (r.body?.status === "failed") { + const uptime = Number(r.body?.uptime_seconds ?? 0); + const sampleErr = r.body?.last_sample_error; + const preStartCredentialAbort = uptime === 0 && !sampleErr; + if (preStartCredentialAbort) { + // Agent never started (no LLM cred on this staging tenant — the + // expected #2162 platform-proxy gap). The workspace row still + // renders, which is all the tab-UI test needs. Proceed, but log + // loudly so a real "agent never booted because of something else" + // is not silently normalized. + console.warn( + `[staging-setup] workspace ${workspaceId} is 'failed' with the pre-start ` + + `credential-abort shape (uptime_seconds=0, no last_sample_error) — agent did ` + + `not boot (expected on staging without CP LLM proxy env, post workspace-server ` + + `#2162). The tab-UI test does not exercise the agent; proceeding with the ` + + `workspace row, which renders regardless. full body: ${JSON.stringify(r.body)}`, + ); + return true; + } // last_sample_error is often empty when the failure happens before // the agent emits a sample (e.g. boot crash, image pull error, // missing PYTHONPATH, OpenAI quota at startup). Dumping the full @@ -266,8 +315,8 @@ export default async function globalSetup(_config: FullConfig): Promise { // needs without a second probe. Otherwise this propagates as a // bare "Workspace failed: " — the exact useless message that // sent #2632 to the issue tracker. - const detail = r.body.last_sample_error - ? r.body.last_sample_error + const detail = sampleErr + ? sampleErr : `(no last_sample_error) full body: ${JSON.stringify(r.body)}`; throw new Error(`Workspace failed: ${detail}`); } @@ -277,7 +326,7 @@ export default async function globalSetup(_config: FullConfig): Promise { 10_000, "workspace online", ); - console.log(`[staging-setup] Workspace online`); + console.log(`[staging-setup] Workspace renderable`); // 7. Hand state off to tests + teardown — overwrite the slug-only // bootstrap state with the full state spec tests need. -- 2.52.0