refactor(canvas): extract runtime profiles to @/lib/runtimeProfiles

Preparation for a "hundreds of runtimes" plugin ecosystem. Keeping the
runtime-specific UX knobs in-line inside ProvisioningTimeout scales badly
— every new runtime would require editing a component, not just adding a
table entry. Other components (create-workspace dialog, workspace card
tooltips, etc.) will want the same runtime metadata.

Changes:

- New file `canvas/src/lib/runtimeProfiles.ts` owns:
  * `RuntimeProfile` type — structural shape, every field optional so
    new runtimes can partially-fill without breaking consumers.
  * `DEFAULT_RUNTIME_PROFILE` — 2-min default floor (docker-fast).
  * `RUNTIME_PROFILES` — named overrides (currently: hermes 12 min).
  * `WorkspaceRuntimeOverrides` — interface for server-provided
    per-workspace overrides, so operators can tune via template
    manifest / workspace metadata without a canvas release.
  * `getRuntimeProfile()` — resolver with
    overrides → profile → default priority.
  * `provisionTimeoutForRuntime()` — convenience wrapper.

- `ProvisioningTimeout.tsx` now delegates to the profile module.
  `DEFAULT_PROVISION_TIMEOUT_MS` re-exported for legacy test importers.

- Tests: 16/16 (up from 9 before the first fix). Adds pinning for:
  * overrides > profile > default priority chain
  * "every entry in RUNTIME_PROFILES resolves to a number" contract
  * backward-compat export

Adding a new slow runtime is now one table entry in
`canvas/src/lib/runtimeProfiles.ts` with a mandatory `WHY` comment.
Moving to server-driven profiles later is a ~10-line change (the
resolver already threads WorkspaceRuntimeOverrides through).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hongming Wang 2026-04-24 11:48:39 -07:00
parent 9597d262ca
commit 0b237ed9dd
3 changed files with 225 additions and 67 deletions

View File

@ -6,38 +6,16 @@ import { api } from "@/lib/api";
import { showToast } from "./Toaster";
import { ConsoleModal } from "./ConsoleModal";
/** Base provisioning timeout in milliseconds (2 minutes). Floor for fast
* runtimes (claude-code, langgraph, crewai) on Docker where cold boot
* is 30-90s. Slow runtimes override via RUNTIME_TIMEOUT_OVERRIDES_MS.
* The effective threshold also scales with concurrent-provisioning
* count (see effectiveTimeoutMs below). */
export const DEFAULT_PROVISION_TIMEOUT_MS = 120_000;
import {
DEFAULT_RUNTIME_PROFILE,
provisionTimeoutForRuntime,
} from "@/lib/runtimeProfiles";
/** Per-runtime timeout floors for cold-boot sequences that legitimately
* exceed the 2-minute default. A too-low threshold creates false-alarm
* banners telling users "your workspace is stuck" while it's actually
* mid-install confusing, and it makes users retry workspaces that
* would have come online on their own.
*
* Hermes at 12min: installs ripgrep + ffmpeg + node22 + builds
* hermes-agent from source + Playwright + Chromium (~300MB). Measured
* boots on staging EC2 routinely land at 8-13 min. Aligns with the
* SaaS E2E PROVISION_TIMEOUT_SECS=900 (15 min) so the UI warning lands
* shortly before the backend itself gives up.
*
* Add entries here as new runtimes surface false-alarm complaints.
* Runtimes absent from the map get DEFAULT_PROVISION_TIMEOUT_MS. */
export const RUNTIME_TIMEOUT_OVERRIDES_MS: Record<string, number> = {
hermes: 720_000, // 12 min — see comment above
};
/** Resolve the base timeout for a workspace given its runtime. */
export function timeoutForRuntime(runtime: string | undefined): number {
if (runtime && runtime in RUNTIME_TIMEOUT_OVERRIDES_MS) {
return RUNTIME_TIMEOUT_OVERRIDES_MS[runtime];
}
return DEFAULT_PROVISION_TIMEOUT_MS;
}
/** Re-export for backward compatibility with tests and other importers
* that previously imported DEFAULT_PROVISION_TIMEOUT_MS from this file.
* New code should read via getRuntimeProfile() from @/lib/runtimeProfiles. */
export const DEFAULT_PROVISION_TIMEOUT_MS =
DEFAULT_RUNTIME_PROFILE.provisionTimeoutMs;
/** The server provisions up to `PROVISION_CONCURRENCY` containers at
* once and paces the rest in a queue (`workspaceCreatePacingMs` =
@ -155,14 +133,15 @@ export function ProvisioningTimeout({
const now = Date.now();
const newTimedOut: TimeoutEntry[] = [];
// Per-node timeout: each workspace has its own base (runtime-aware)
// scaled by the total concurrent-provisioning count. A hermes
// workspace in a batch alongside two langgraph workspaces gets
// hermes's 12-min base, not langgraph's 2-min base.
// Per-node timeout: each workspace resolves its own base via
// @/lib/runtimeProfiles (server-override → runtime profile →
// default), then scales by concurrent-provisioning count. A
// hermes workspace in a batch alongside two langgraph workspaces
// gets hermes's 12-min base, not langgraph's 2-min base.
for (const node of parsedProvisioningNodes) {
const startedAt = tracking.get(node.id);
if (!startedAt) continue;
const base = timeoutMs ?? timeoutForRuntime(node.runtime);
const base = timeoutMs ?? provisionTimeoutForRuntime(node.runtime);
const effective = effectiveTimeoutMs(
base,
parsedProvisioningNodes.length,

View File

@ -7,11 +7,13 @@ global.fetch = vi.fn(() =>
import { useCanvasStore } from "../../store/canvas";
import type { WorkspaceData } from "../../store/socket";
import { DEFAULT_PROVISION_TIMEOUT_MS } from "../ProvisioningTimeout";
import {
DEFAULT_PROVISION_TIMEOUT_MS,
RUNTIME_TIMEOUT_OVERRIDES_MS,
timeoutForRuntime,
} from "../ProvisioningTimeout";
DEFAULT_RUNTIME_PROFILE,
RUNTIME_PROFILES,
getRuntimeProfile,
provisionTimeoutForRuntime,
} from "@/lib/runtimeProfiles";
// Helper to build a WorkspaceData object
function makeWS(overrides: Partial<WorkspaceData> & { id: string }): WorkspaceData {
@ -196,37 +198,94 @@ describe("ProvisioningTimeout", () => {
// the 2-min floor for fast docker runtimes while giving hermes its
// honest 12-min budget.
describe("timeoutForRuntime", () => {
it("returns the 2-min default for unknown/missing runtimes", () => {
expect(timeoutForRuntime(undefined)).toBe(DEFAULT_PROVISION_TIMEOUT_MS);
expect(timeoutForRuntime("")).toBe(DEFAULT_PROVISION_TIMEOUT_MS);
expect(timeoutForRuntime("some-future-runtime")).toBe(
DEFAULT_PROVISION_TIMEOUT_MS,
);
describe("runtime profile resolution (@/lib/runtimeProfiles)", () => {
describe("provisionTimeoutForRuntime", () => {
it("returns the default for unknown/missing runtimes", () => {
expect(provisionTimeoutForRuntime(undefined)).toBe(
DEFAULT_RUNTIME_PROFILE.provisionTimeoutMs,
);
expect(provisionTimeoutForRuntime("")).toBe(
DEFAULT_RUNTIME_PROFILE.provisionTimeoutMs,
);
expect(provisionTimeoutForRuntime("some-future-runtime")).toBe(
DEFAULT_RUNTIME_PROFILE.provisionTimeoutMs,
);
});
it("returns default for known-fast runtimes (not in profile map)", () => {
// If someone ever adds one of these to RUNTIME_PROFILES with a
// slower value, this test catches the unintended regression.
expect(provisionTimeoutForRuntime("claude-code")).toBe(
DEFAULT_RUNTIME_PROFILE.provisionTimeoutMs,
);
expect(provisionTimeoutForRuntime("langgraph")).toBe(
DEFAULT_RUNTIME_PROFILE.provisionTimeoutMs,
);
expect(provisionTimeoutForRuntime("crewai")).toBe(
DEFAULT_RUNTIME_PROFILE.provisionTimeoutMs,
);
});
it("returns hermes override when runtime = hermes", () => {
expect(provisionTimeoutForRuntime("hermes")).toBe(
RUNTIME_PROFILES.hermes?.provisionTimeoutMs,
);
expect(provisionTimeoutForRuntime("hermes")).toBeGreaterThanOrEqual(
DEFAULT_RUNTIME_PROFILE.provisionTimeoutMs * 5,
);
});
it("server-side workspace override wins over runtime profile", () => {
// The resolution order is: overrides → profile → default.
// An operator-tunable per-workspace number on the backend
// (e.g. via a template manifest field) should beat the canvas
// runtime map.
expect(
provisionTimeoutForRuntime("hermes", {
provisionTimeoutMs: 60_000,
}),
).toBe(60_000);
expect(
provisionTimeoutForRuntime("some-unknown", {
provisionTimeoutMs: 300_000,
}),
).toBe(300_000);
});
});
it("returns the docker-fast 2-min default for known-fast runtimes", () => {
// These aren't in the override map so they get the default.
// If someone ever adds one of them to RUNTIME_TIMEOUT_OVERRIDES_MS,
// this test catches the accidental regression.
expect(timeoutForRuntime("claude-code")).toBe(DEFAULT_PROVISION_TIMEOUT_MS);
expect(timeoutForRuntime("langgraph")).toBe(DEFAULT_PROVISION_TIMEOUT_MS);
expect(timeoutForRuntime("crewai")).toBe(DEFAULT_PROVISION_TIMEOUT_MS);
describe("getRuntimeProfile", () => {
it("returns a structural profile with required fields", () => {
const profile = getRuntimeProfile("hermes");
expect(profile.provisionTimeoutMs).toBeTypeOf("number");
expect(profile.provisionTimeoutMs).toBeGreaterThan(0);
});
it("default profile is a valid superset of every override", () => {
// Every entry in RUNTIME_PROFILES must provide fields the
// default does — otherwise consumers could get undefined where
// they expected a number. This test enforces that contract so
// future entries can't accidentally drop fields.
for (const [runtime, profile] of Object.entries(RUNTIME_PROFILES)) {
const resolved = getRuntimeProfile(runtime);
expect(
resolved.provisionTimeoutMs,
`runtime=${runtime} must resolve to a number`,
).toBeTypeOf("number");
expect(resolved.provisionTimeoutMs).toBeGreaterThan(0);
// Profile's explicit value should be used iff present.
if (profile.provisionTimeoutMs !== undefined) {
expect(resolved.provisionTimeoutMs).toBe(profile.provisionTimeoutMs);
}
}
});
});
it("returns 12 min for hermes — covers cold-boot install tail", () => {
expect(timeoutForRuntime("hermes")).toBe(720_000);
expect(timeoutForRuntime("hermes")).toBe(
RUNTIME_TIMEOUT_OVERRIDES_MS.hermes,
);
});
it("hermes override is materially longer than the default", () => {
// Guard against future refactors that accidentally weaken the
// override (e.g. typo lowering hermes to 72_000 = 72s).
expect(RUNTIME_TIMEOUT_OVERRIDES_MS.hermes).toBeGreaterThanOrEqual(
DEFAULT_PROVISION_TIMEOUT_MS * 5,
);
describe("DEFAULT_PROVISION_TIMEOUT_MS backward-compat export", () => {
it("still exports the same default for legacy importers", () => {
expect(DEFAULT_PROVISION_TIMEOUT_MS).toBe(
DEFAULT_RUNTIME_PROFILE.provisionTimeoutMs,
);
});
});
});
});

View File

@ -0,0 +1,120 @@
/**
* Runtime profiles per-runtime UX metadata.
*
* Scaling target: hundreds of runtimes (plugin-architecture-v2 roadmap).
* This module is the single source of truth for runtime-specific UI knobs
* on the canvas side. Each runtime can declare:
*
* - provisionTimeoutMs: when to show the "taking longer than expected"
* banner. Fast docker runtimes = 2min; slow source-build runtimes = 12min.
* - (future) label, icon, color, helpUrl, capabilities add as needed.
*
* Resolution order (most specific wins):
*
* 1. Server-provided override on the workspace data (e.g.
* `workspace.data.provisionTimeoutMs` set from a template manifest).
* Lets operators tune without a canvas release once server-side
* declarative config lands.
* 2. Per-runtime entry in RUNTIME_PROFILES.
* 3. DEFAULT_RUNTIME_PROFILE.
*
* Adding a new runtime:
* - If it's fast ( 2min cold boot): do nothing, the default catches it.
* - If it's slow: add one entry to RUNTIME_PROFILES below.
* - Long-term: move runtime profiles server-side so this file can shrink.
*
* Architectural note: this deliberately lives under /lib, NOT
* /components/ProvisioningTimeout. Other components (e.g. a
* "create workspace" dialog that needs to know the runtime's expected
* cold-boot time) should import from here too avoids duplicating the
* runtime-name knowledge across the codebase.
*/
/**
* Structural shape of a runtime profile. Add fields as new UX knobs
* become runtime-specific. Every field should be optional so new runtimes
* can partially fill the profile without breaking older code that reads
* only some fields.
*/
export interface RuntimeProfile {
/** Milliseconds before the canvas shows the "taking too long" banner.
* Base value the ProvisioningTimeout component still scales this by
* concurrent-provisioning count. */
provisionTimeoutMs?: number;
// Future extensions (kept commented until used):
// label?: string;
// icon?: string;
// color?: string;
// helpUrl?: string;
}
/** The floor every runtime inherits unless it overrides. Calibrated for
* docker-local fast runtimes (claude-code, langgraph, crewai) where cold
* boot is 30-90s. */
export const DEFAULT_RUNTIME_PROFILE: Required<
Pick<RuntimeProfile, "provisionTimeoutMs">
> = {
provisionTimeoutMs: 120_000, // 2 min
};
/**
* Named per-runtime overrides. Keep this map small and explicit
* each entry is a deliberate statement that this runtime's cold-boot
* behavior differs materially from the default.
*
* Each override must also ship with a comment explaining WHY the default
* is wrong for this runtime. Unexplained numbers rot.
*/
export const RUNTIME_PROFILES: Record<string, RuntimeProfile> = {
hermes: {
// 12 min. Installs ripgrep + ffmpeg + node22 + builds hermes-agent
// from source + Playwright + Chromium (~300MB download). Measured
// cold boots on staging EC2 routinely land at 8-13 min. Aligns
// with SaaS E2E's PROVISION_TIMEOUT_SECS=900 (15 min) so the UI
// warning lands shortly before the backend itself gives up.
provisionTimeoutMs: 720_000,
},
};
/**
* Data fields the canvas can consult for per-workspace overrides. These
* let the backend (via workspace data on the socket payload) override
* profile values without a canvas release.
*
* Intentionally loose typing if a field isn't present on the node, we
* fall through to the runtime profile.
*/
export interface WorkspaceRuntimeOverrides {
provisionTimeoutMs?: number;
}
/**
* Resolve a runtime profile for a given runtime name, optionally merging
* server-provided per-workspace overrides on top.
*
* Resolution (most-specific wins):
* overrides.provisionTimeoutMs
* RUNTIME_PROFILES[runtime].provisionTimeoutMs
* DEFAULT_RUNTIME_PROFILE.provisionTimeoutMs
*/
export function getRuntimeProfile(
runtime: string | undefined,
overrides?: WorkspaceRuntimeOverrides,
): Required<Pick<RuntimeProfile, "provisionTimeoutMs">> {
const profile = runtime ? RUNTIME_PROFILES[runtime] : undefined;
return {
provisionTimeoutMs:
overrides?.provisionTimeoutMs ??
profile?.provisionTimeoutMs ??
DEFAULT_RUNTIME_PROFILE.provisionTimeoutMs,
};
}
/** Convenience: just the provisionTimeoutMs. Equivalent to
* `getRuntimeProfile(runtime, overrides).provisionTimeoutMs`. */
export function provisionTimeoutForRuntime(
runtime: string | undefined,
overrides?: WorkspaceRuntimeOverrides,
): number {
return getRuntimeProfile(runtime, overrides).provisionTimeoutMs;
}