feat(canvas): per-workspace provision_timeout_ms override (#2054)
Phase 1 of moving runtime UX knobs server-side. Builds the canvas
foundation: a workspace can carry its own provision_timeout_ms
(sourced server-side from a template manifest in a follow-up PR),
and ProvisioningTimeout's resolver respects it per-node.
Today the resolver had Props-level timeoutMs that applied to ALL
nodes — fine for tests but wrong for production where one batch
could mix runtimes (hermes 12-min cold boot alongside docker 2-min).
The runtime profile fallback already handles per-runtime defaults;
this PR adds the per-WORKSPACE override layer above that.
Resolution priority (most specific wins):
1. node.provisionTimeoutMs — server-declared per-workspace
override (this PR's new field)
2. timeoutMs prop — single-threshold test override
3. runtime profile in @/lib/runtimeProfiles
4. DEFAULT_RUNTIME_PROFILE
Changes:
- WorkspaceData (socket): add optional provision_timeout_ms
- WorkspaceNodeData: add optional provisionTimeoutMs
- canvas-topology hydrate: thread the field through to node.data
- ProvisioningTimeout: extend the serialized-string node iteration
to carry provisionTimeoutMs (4-field positional split); pass as
the second arg to provisionTimeoutForRuntime
- 3 new tests in ProvisioningTimeout.test.tsx covering hydrate
threading, null fall-through, and resolver priority
Phase 2 (separate PR, blocked on workspace-server template-config
loader): workspace-server reads provision_timeout_seconds from
template config.yaml at provision time, includes
provision_timeout_ms in the workspace API/socket response. Phase 3
(template-repo PR): template-hermes config.yaml declares
provision_timeout_seconds: 720; canvas RUNTIME_PROFILES.hermes
becomes redundant and can be removed.
19/19 tests pass (3 new + 16 existing).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
dff14c010e
commit
1a273f21f5
@ -71,15 +71,19 @@ export function ProvisioningTimeout({
|
||||
// Runtime included so the timeout threshold can be resolved per-node
|
||||
// (hermes cold-boot legitimately takes 8-13 min vs 30-90s for docker
|
||||
// runtimes — a single threshold would false-alarm on one or the other).
|
||||
// provisionTimeoutMs added by #2054 — server-declared per-workspace
|
||||
// override that wins over the runtime profile when present.
|
||||
// Separator: `|` between fields, `,` between nodes. Names may contain
|
||||
// anything the user typed; strip `|` and `,` so serialization round-trips.
|
||||
// Empty-string sentinels for missing values so split/index stays positional.
|
||||
const provisioningNodes = useCanvasStore((s) => {
|
||||
const result = s.nodes
|
||||
.filter((n) => n.data.status === "provisioning")
|
||||
.map((n) => {
|
||||
const safeName = (n.data.name ?? "").replace(/[|,]/g, " ");
|
||||
const runtime = n.data.runtime ?? "";
|
||||
return `${n.id}|${safeName}|${runtime}`;
|
||||
const provisionTimeoutMs = n.data.provisionTimeoutMs ?? "";
|
||||
return `${n.id}|${safeName}|${runtime}|${provisionTimeoutMs}`;
|
||||
});
|
||||
return result.join(",");
|
||||
});
|
||||
@ -87,8 +91,14 @@ export function ProvisioningTimeout({
|
||||
() =>
|
||||
provisioningNodes
|
||||
? provisioningNodes.split(",").map((entry) => {
|
||||
const [id, name, runtime] = entry.split("|");
|
||||
return { id, name, runtime };
|
||||
const [id, name, runtime, provisionTimeoutMs] = entry.split("|");
|
||||
const ptms = provisionTimeoutMs ? Number(provisionTimeoutMs) : undefined;
|
||||
return {
|
||||
id,
|
||||
name,
|
||||
runtime,
|
||||
provisionTimeoutMs: Number.isFinite(ptms) ? ptms : undefined,
|
||||
};
|
||||
})
|
||||
: [],
|
||||
[provisioningNodes],
|
||||
@ -138,10 +148,19 @@ export function ProvisioningTimeout({
|
||||
// default), then scales by concurrent-provisioning count. A
|
||||
// hermes workspace in a batch alongside two langgraph workspaces
|
||||
// gets hermes's 12-min base, not langgraph's 2-min base.
|
||||
//
|
||||
// Resolution priority (most specific wins):
|
||||
// 1. node.provisionTimeoutMs — server-declared per-workspace
|
||||
// override (#2054, sourced from template manifest)
|
||||
// 2. timeoutMs prop — single-threshold test override
|
||||
// 3. runtime profile in @/lib/runtimeProfiles
|
||||
// 4. DEFAULT_RUNTIME_PROFILE
|
||||
for (const node of parsedProvisioningNodes) {
|
||||
const startedAt = tracking.get(node.id);
|
||||
if (!startedAt) continue;
|
||||
const base = timeoutMs ?? provisionTimeoutForRuntime(node.runtime);
|
||||
const base = provisionTimeoutForRuntime(node.runtime, {
|
||||
provisionTimeoutMs: node.provisionTimeoutMs ?? timeoutMs,
|
||||
});
|
||||
const effective = effectiveTimeoutMs(
|
||||
base,
|
||||
parsedProvisioningNodes.length,
|
||||
|
||||
@ -287,5 +287,60 @@ describe("ProvisioningTimeout", () => {
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// #2054 — per-workspace server override threading from socket
|
||||
// payload through node-data into ProvisioningTimeout's resolver.
|
||||
// Doesn't render the component; verifies the data path lands the
|
||||
// value where ProvisioningTimeout reads it from.
|
||||
describe("server-side per-workspace override (#2054)", () => {
|
||||
it("hydrate carries provision_timeout_ms onto node.data.provisionTimeoutMs", () => {
|
||||
useCanvasStore.getState().hydrate([
|
||||
makeWS({
|
||||
id: "ws-slow",
|
||||
name: "Slow",
|
||||
status: "provisioning",
|
||||
runtime: "future-runtime",
|
||||
provision_timeout_ms: 600_000,
|
||||
}),
|
||||
]);
|
||||
const node = useCanvasStore
|
||||
.getState()
|
||||
.nodes.find((n) => n.id === "ws-slow");
|
||||
expect(node?.data.provisionTimeoutMs).toBe(600_000);
|
||||
});
|
||||
|
||||
it("absent provision_timeout_ms hydrates to null (falls through to runtime profile)", () => {
|
||||
useCanvasStore.getState().hydrate([
|
||||
makeWS({ id: "ws-default", name: "Default", status: "provisioning", runtime: "hermes" }),
|
||||
]);
|
||||
const node = useCanvasStore
|
||||
.getState()
|
||||
.nodes.find((n) => n.id === "ws-default");
|
||||
expect(node?.data.provisionTimeoutMs).toBeNull();
|
||||
// And the resolver still returns hermes' profile value when
|
||||
// no override is supplied — proves the fall-through stays intact.
|
||||
expect(
|
||||
provisionTimeoutForRuntime("hermes", {
|
||||
provisionTimeoutMs: node?.data.provisionTimeoutMs ?? undefined,
|
||||
}),
|
||||
).toBe(RUNTIME_PROFILES.hermes.provisionTimeoutMs);
|
||||
});
|
||||
|
||||
it("server override wins over runtime profile via the resolver path the component uses", () => {
|
||||
// Mirrors ProvisioningTimeout.tsx:144 where node.provisionTimeoutMs
|
||||
// is passed as overrides — verifies the resolver respects it
|
||||
// even when the runtime has its own profile entry.
|
||||
const override = 30_000;
|
||||
expect(
|
||||
provisionTimeoutForRuntime("hermes", {
|
||||
provisionTimeoutMs: override,
|
||||
}),
|
||||
).toBe(override);
|
||||
// Sanity — the runtime profile would have been much larger.
|
||||
expect(RUNTIME_PROFILES.hermes.provisionTimeoutMs).toBeGreaterThan(
|
||||
override,
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@ -478,6 +478,9 @@ export function buildNodesAndEdges(
|
||||
needsRestart: false,
|
||||
budgetLimit: ws.budget_limit ?? null,
|
||||
budgetUsed: ws.budget_used ?? null,
|
||||
// #2054 — server-declared per-workspace provisioning timeout.
|
||||
// Falls through to the runtime profile when null/absent.
|
||||
provisionTimeoutMs: ws.provision_timeout_ms ?? null,
|
||||
},
|
||||
};
|
||||
if (hasParent) {
|
||||
|
||||
@ -92,6 +92,12 @@ export interface WorkspaceNodeData extends Record<string, unknown> {
|
||||
budgetLimit: number | null;
|
||||
/** Cumulative USD spend. Present when the platform tracks spend (issue #541). */
|
||||
budgetUsed?: number | null;
|
||||
/** Per-workspace provisioning-timeout override in milliseconds (#2054).
|
||||
* Sourced server-side from the workspace's template manifest at provision
|
||||
* time. null/absent = fall through to runtime profile + default in
|
||||
* @/lib/runtimeProfiles. Lets a slow runtime declare its cold-boot
|
||||
* expectation without a canvas release. */
|
||||
provisionTimeoutMs?: number | null;
|
||||
}
|
||||
|
||||
export type PanelTab = "details" | "skills" | "chat" | "terminal" | "config" | "schedule" | "channels" | "files" | "memory" | "traces" | "events" | "activity" | "audit";
|
||||
|
||||
@ -122,6 +122,13 @@ export interface WorkspaceData {
|
||||
budget_limit: number | null;
|
||||
/** Cumulative USD spend for this workspace. Present when the platform tracks spend. */
|
||||
budget_used?: number | null;
|
||||
/** Server-declared provisioning-timeout override in milliseconds (#2054).
|
||||
* Sourced from the workspace's template manifest at provision time —
|
||||
* lets a slow runtime declare its cold-boot expectation without a
|
||||
* canvas release. Falls through to the per-runtime profile in
|
||||
* `@/lib/runtimeProfiles` when absent (the default behavior for any
|
||||
* template that hasn't yet declared the field). */
|
||||
provision_timeout_ms?: number | null;
|
||||
}
|
||||
|
||||
let socket: ReconnectingSocket | null = null;
|
||||
|
||||
Loading…
Reference in New Issue
Block a user