canvas/e2e: surface admin-orgs row + workspace body on failure
Two diagnostic upgrades to the Playwright staging-setup harness, both
zero-behavior-change:
1. provision-failed throw now includes the full admin-orgs row (boot
stage, last error, terraform/SSM state, etc) instead of just the
slug. Every "provision failed: <slug>" in CI history was followed
by a manual repro to find out WHY — that round-trip is gone.
2. workspace-failed throw dumps the full /workspaces/{id} body when
last_sample_error is empty. Boot crashes, image-pull errors,
missing PYTHONPATH, and OpenAI-quota-at-startup all surface as a
bare "Workspace failed:" today (see #2632). Now they carry the
boot_stage / image / last_error fields the API row exposes.
No fix for the underlying flakes — those are tracked in #2632 (CP race)
and #2578 (OpenAI quota). This just stops them looking identical in the
CI log.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
18e88e7039
commit
9d45211fd3
@ -169,7 +169,17 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
|||||||
orgID = row.id;
|
orgID = row.id;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (row.instance_status === "failed") throw new Error(`provision failed: ${slug}`);
|
if (row.instance_status === "failed") {
|
||||||
|
// Dump every diagnostic field the admin row carries — boot stage,
|
||||||
|
// last error, terraform/SSM state, etc. The bare slug message used
|
||||||
|
// to surface ZERO context, so triaging a failed provision meant
|
||||||
|
// re-running locally to repro. Now the failure log carries enough
|
||||||
|
// to point at the right subsystem (CP/AWS/SSM/runtime) without a
|
||||||
|
// second round-trip.
|
||||||
|
throw new Error(
|
||||||
|
`provision failed: ${slug} — admin-orgs row: ${JSON.stringify(row)}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
return null;
|
return null;
|
||||||
},
|
},
|
||||||
PROVISION_TIMEOUT_MS,
|
PROVISION_TIMEOUT_MS,
|
||||||
@ -249,7 +259,17 @@ export default async function globalSetup(_config: FullConfig): Promise<void> {
|
|||||||
if (r.status !== 200) return null;
|
if (r.status !== 200) return null;
|
||||||
if (r.body?.status === "online") return true;
|
if (r.body?.status === "online") return true;
|
||||||
if (r.body?.status === "failed") {
|
if (r.body?.status === "failed") {
|
||||||
throw new Error(`Workspace failed: ${r.body.last_sample_error || ""}`);
|
// last_sample_error is often empty when the failure happens before
|
||||||
|
// the agent emits a sample (e.g. boot crash, image pull error,
|
||||||
|
// missing PYTHONPATH, OpenAI quota at startup). Dumping the full
|
||||||
|
// body gives triage the boot_stage / last_error / image fields it
|
||||||
|
// needs without a second probe. Otherwise this propagates as a
|
||||||
|
// bare "Workspace failed: " — the exact useless message that
|
||||||
|
// sent #2632 to the issue tracker.
|
||||||
|
const detail = r.body.last_sample_error
|
||||||
|
? r.body.last_sample_error
|
||||||
|
: `(no last_sample_error) full body: ${JSON.stringify(r.body)}`;
|
||||||
|
throw new Error(`Workspace failed: ${detail}`);
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
},
|
},
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user