Files
molecule-core/canvas/src/lib/api.ts
hongming 34179e64a3
ci-arm64-advisory / fast-checks (pull_request) Waiting to run
Lint shellcheck (arm64 pilot) / shellcheck-arm64 (pilot) (pull_request) Successful in 8s
Block internal-flavored paths / Block forbidden paths (pull_request) Successful in 10s
CI / Python Lint & Test (pull_request) Successful in 7s
CI / Detect changes (pull_request) Successful in 9s
E2E API Smoke Test / detect-changes (pull_request) Successful in 9s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (pull_request) Successful in 7s
E2E Chat / detect-changes (pull_request) Successful in 11s
E2E Staging Canvas (Playwright) / detect-changes (pull_request) Successful in 13s
E2E Peer Visibility (literal MCP list_peers) / E2E Peer Visibility (local) (pull_request) Successful in 55s
E2E Staging SaaS (full lifecycle) / pr-validate (pull_request) Successful in 47s
Harness Replays / detect-changes (pull_request) Successful in 3s
Handlers Postgres Integration / detect-changes (pull_request) Successful in 7s
Lint curl status-code capture / Scan workflows for curl status-capture pollution (pull_request) Successful in 5s
Lint no tenant GITEA or GITHUB token write / Scan for repo-host token write into tenant workspace surface (pull_request) Successful in 12s
Lint forbidden tenant-env keys / Scan workspace_secrets writers for forbidden env keys (pull_request) Successful in 16s
lint-continue-on-error-tracking / lint-continue-on-error-tracking (pull_request) Successful in 1m16s
Lint pre-flip continue-on-error / Verify continue-on-error flips have run-log proof (pull_request) Successful in 1m15s
lint-required-workflows-docker-host-pinned / Lint docker-host pin on docker-touching workflows (pull_request) Successful in 5s
lint-required-no-paths / lint-required-no-paths (pull_request) Successful in 1m3s
Secret scan / Scan diff for credential-shaped strings (pull_request) Successful in 6s
lint-required-context-exists-in-bp / lint-required-context-exists-in-bp (pull_request) Successful in 1m28s
gate-check-v3 / gate-check (pull_request) Successful in 4s
qa-review / approved (pull_request) Successful in 4s
security-review / approved (pull_request) Successful in 3s
sop-checklist / na-declarations (pull_request) N/A: (none)
sop-checklist / all-items-acked (pull_request) Successful in 3s
sop-checklist / review-refire (pull_request) Has been skipped
Lint workflow YAML (Gitea-1.22.6-hostile shapes) / Lint workflow YAML for Gitea-1.22.6-hostile shapes (pull_request) Successful in 1m19s
sop-tier-check / tier-check (pull_request) Successful in 4s
E2E Staging External Runtime / E2E Staging External Runtime (pull_request) Successful in 5m10s
E2E Staging SaaS (full lifecycle) / E2E Staging SaaS (pull_request) Successful in 5m2s
Ops Scripts Tests / Ops scripts (unittest) (pull_request) Successful in 1m34s
CI / Shellcheck (E2E scripts) (pull_request) Successful in 24s
E2E API Smoke Test / E2E API Smoke Test (pull_request) Successful in 1m46s
E2E Chat / E2E Chat (pull_request) Successful in 4s
E2E Staging Canvas (Playwright) / Canvas tabs E2E (pull_request) Successful in 2s
Harness Replays / Harness Replays (pull_request) Successful in 4s
Handlers Postgres Integration / Handlers Postgres Integration (pull_request) Successful in 1m58s
CI / Platform (Go) (pull_request) Successful in 5m11s
CI / Canvas (Next.js) (pull_request) Successful in 7m3s
CI / all-required (pull_request) Successful in 18m55s
CI / Canvas Deploy Reminder (pull_request) Has been skipped
audit-force-merge / audit (pull_request) Successful in 6s
fix: require workspace name confirmation on delete
2026-05-24 21:08:04 -07:00

194 lines
8.8 KiB
TypeScript

import { getTenantSlug } from "./tenant";
// When NEXT_PUBLIC_PLATFORM_URL is set to "" (empty string), the canvas
// uses relative paths — correct for the combined tenant image where Go
// platform + canvas run on the same port via reverse proxy. The `??`
// operator preserves "" as a valid value; `||` would fall through to
// the localhost default.
export const PLATFORM_URL =
process.env.NEXT_PUBLIC_PLATFORM_URL ?? "http://localhost:8080";
// 35s is long enough for the slowest server-side path (EIC SSH
// tunnel for tenant EC2 file operations, bounded server-side by
// `eicFileOpTimeout = 30 * time.Second` in
// workspace-server/internal/handlers/template_files_eic.go) so the
// canvas surfaces the server's real error instead of aborting first
// with a generic timeout. Shorter values caused "Save & Restart" to
// time out at the client before the backend returned its 5xx. The
// abort still propagates through AbortController so React components
// can render a retry affordance. Callers that know an endpoint is
// intentionally slow (org import walks a tree of workspaces with
// server-side pacing) can pass `timeoutMs` to override.
const DEFAULT_TIMEOUT_MS = 35_000;
export interface RequestOptions {
timeoutMs?: number;
headers?: Record<string, string>;
}
/**
* Build the platform auth header set used by every authenticated fetch
* from the canvas. Returns a fresh object so callers can mutate (e.g.
* append `Content-Type` for JSON requests, omit it for FormData).
*
* SaaS cross-origin shape:
* - `X-Molecule-Org-Slug` — derived from `window.location.hostname`
* by `getTenantSlug()`. Control plane uses it for fly-replay
* routing. Empty on localhost / non-tenant hosts — safe to omit.
* - `Authorization: Bearer <token>` — `NEXT_PUBLIC_ADMIN_TOKEN` baked
* into the canvas build (see canvas/Dockerfile L8/L11). Required by
* the workspace-server when `ADMIN_TOKEN` is set on the server side
* (Tier-2b AdminAuth gate, wsauth_middleware.go ~L245). Empty when
* no admin token was provisioned — the Tier-1 session-cookie path
* handles that case via `credentials:"include"`.
*
* Why a shared helper: the two-line "read env, attach bearer; read
* slug, attach header" pattern was duplicated across `request()` and
* 7 raw-fetch callsites (chat uploads/download + 5 Attachment*
* components) before this consolidation. A new poller or raw fetch
* that forgets one of the two headers silently 401s against
* workspace-server when ADMIN_TOKEN is set — the exact bug shape
* called out in #178 / closes the post-#176 self-review gap.
*
* Callers that want JSON Content-Type should spread this and add it
* themselves; FormData callers should NOT add Content-Type (the
* browser sets the multipart boundary). Centralizing the auth pair
* but leaving Content-Type up to the caller is the minimum viable
* shared shape.
*/
export function platformAuthHeaders(): Record<string, string> {
const headers: Record<string, string> = {};
const slug = getTenantSlug();
if (slug) headers["X-Molecule-Org-Slug"] = slug;
const adminToken = process.env.NEXT_PUBLIC_ADMIN_TOKEN;
if (adminToken) headers["Authorization"] = `Bearer ${adminToken}`;
return headers;
}
async function request<T>(
method: string,
path: string,
body?: unknown,
retryCount = 0,
options?: RequestOptions,
): Promise<T> {
// JSON-bodied request — Content-Type is JSON. Auth pair comes from
// the shared helper; see its doc comment for the SaaS-shape rationale.
const headers: Record<string, string> = {
"Content-Type": "application/json",
...platformAuthHeaders(),
...(options?.headers ?? {}),
};
// Re-read slug locally for the 401 handler below — `headers` already
// has it, but the 401 branch needs the bare value to gate the
// session-probe + redirect logic on tenant context.
const slug = getTenantSlug();
const res = await fetch(`${PLATFORM_URL}${path}`, {
method,
headers,
body: body ? JSON.stringify(body) : undefined,
credentials: "include",
signal: AbortSignal.timeout(options?.timeoutMs ?? DEFAULT_TIMEOUT_MS),
});
// Transient rate-limit recovery. A single IP bucket can momentarily
// spike on page load (several panels hydrate simultaneously). Instead
// of bubbling up a 429 that blanks the Canvas, wait the
// Retry-After window and try once — any further 429 surfaces normally.
// GET / idempotent methods only; never auto-retry mutations.
if (res.status === 429 && retryCount === 0 && method === "GET") {
const retryAfterHeader = res.headers.get("Retry-After");
const retryAfter = retryAfterHeader ? parseInt(retryAfterHeader, 10) : NaN;
const delayMs = Number.isFinite(retryAfter) ? Math.min(retryAfter, 20) * 1000 : 2000;
await new Promise((resolve) => setTimeout(resolve, delayMs));
return request<T>(method, path, body, retryCount + 1, options);
}
if (res.status === 401) {
// Distinguish "session is dead" from "this endpoint refused this
// token." Old behaviour blanket-redirected on every 401, so a
// single transient 401 from a workspace-scoped endpoint
// (/workspaces/:id/peers, /plugins, etc. that need a workspace
// token rather than the tenant admin bearer) yanked the user
// back to AuthKit even when their session was perfectly fine.
// That broke the staging-tabs E2E for the entire 2026-04-25
// night; #2073/#2074 worked around the symptom in the test by
// mocking 401→200 for every fetch, but the user-facing bug
// stayed.
//
// The canonical "session is dead" signal is /cp/auth/me
// returning 401. For any 401 on a non-auth path, probe
// /cp/auth/me before deciding to redirect:
// - probe 401 → session is actually dead → redirect
// - probe 200 → session is fine, the endpoint just refused
// our specific token → throw a real error,
// caller renders an error state
// - probe network error → assume session-fine (conservative;
// better to throw than to redirect on a
// transient probe failure)
//
// Self-hosted / localhost / reserved subdomains still throw
// without redirecting (slug is empty in those cases) — same
// policy as before.
const isAuthPath = path.startsWith("/cp/auth/");
let sessionDead = isAuthPath;
if (!isAuthPath && slug) {
try {
const probe = await fetch(`${PLATFORM_URL}/cp/auth/me`, {
credentials: "include",
signal: AbortSignal.timeout(5000),
});
sessionDead = probe.status === 401;
} catch {
// Probe failed (network/timeout) — fall through to throw.
}
}
if (sessionDead && slug) {
const { redirectToLogin } = await import("./auth");
redirectToLogin("sign-in");
throw new Error("Session expired — redirecting to login");
}
throw new Error(`API ${method} ${path}: 401 ${await res.text()}`);
}
if (!res.ok) {
const text = await res.text();
// Recognise the platform's structured "datastore unreachable"
// shape (returned by wsauth_middleware.abortAuthLookupError when
// Postgres/Redis is down). Surface as a typed error so callers
// can render a dedicated diagnostic instead of a generic toast.
if (res.status === 503 && text) {
try {
const parsed = JSON.parse(text) as { code?: string; error?: string };
if (parsed.code === "platform_unavailable") {
throw new PlatformUnavailableError(parsed.error || "platform datastore unavailable");
}
} catch (err) {
// Re-throw the typed error if that's what we just constructed.
// JSON.parse failures fall through to the generic Error below.
if (err instanceof PlatformUnavailableError) throw err;
}
}
throw new Error(`API ${method} ${path}: ${res.status} ${text}`);
}
return res.json();
}
/** Thrown when the platform reports its datastore (Postgres/Redis) is
* unreachable. Surface with a dedicated diagnostic UI rather than a
* generic API-error toast — the user's next action is to check local
* services, not to retry the API call. */
export class PlatformUnavailableError extends Error {
readonly code = "platform_unavailable" as const;
constructor(message: string) {
super(message);
this.name = "PlatformUnavailableError";
}
}
export const api = {
get: <T>(path: string, options?: RequestOptions) => request<T>("GET", path, undefined, 0, options),
post: <T>(path: string, body?: unknown, options?: RequestOptions) => request<T>("POST", path, body, 0, options),
patch: <T>(path: string, body?: unknown, options?: RequestOptions) => request<T>("PATCH", path, body, 0, options),
put: <T>(path: string, body?: unknown, options?: RequestOptions) => request<T>("PUT", path, body, 0, options),
del: <T>(path: string, options?: RequestOptions) => request<T>("DELETE", path, undefined, 0, options),
};